summaryrefslogtreecommitdiffstats
path: root/third_party/rust/cranelift-codegen/src/isa/aarch64/inst
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/isa/aarch64/inst')
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs728
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs2359
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs5143
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs1025
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs4057
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs351
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs201
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs158
8 files changed, 14022 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
new file mode 100644
index 0000000000..7bd181c86b
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
@@ -0,0 +1,728 @@
+//! AArch64 ISA definitions: instruction arguments.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+
+use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
+use crate::ir::Type;
+use crate::isa::aarch64::inst::*;
+use crate::machinst::{ty_bits, MachLabel};
+
+use regalloc::{PrettyPrint, RealRegUniverse, Reg, Writable};
+
+use core::convert::Into;
+use std::string::String;
+
+//=============================================================================
+// Instruction sub-components: shift and extend descriptors
+
+/// A shift operator for a register or immediate.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ShiftOp {
+ LSL = 0b00,
+ LSR = 0b01,
+ ASR = 0b10,
+ ROR = 0b11,
+}
+
+impl ShiftOp {
+ /// Get the encoding of this shift op.
+ pub fn bits(self) -> u8 {
+ self as u8
+ }
+}
+
+/// A shift operator amount.
+#[derive(Clone, Copy, Debug)]
+pub struct ShiftOpShiftImm(u8);
+
+impl ShiftOpShiftImm {
+ /// Maximum shift for shifted-register operands.
+ pub const MAX_SHIFT: u64 = 63;
+
+ /// Create a new shiftop shift amount, if possible.
+ pub fn maybe_from_shift(shift: u64) -> Option<ShiftOpShiftImm> {
+ if shift <= Self::MAX_SHIFT {
+ Some(ShiftOpShiftImm(shift as u8))
+ } else {
+ None
+ }
+ }
+
+ /// Return the shift amount.
+ pub fn value(self) -> u8 {
+ self.0
+ }
+
+ /// Mask down to a given number of bits.
+ pub fn mask(self, bits: u8) -> ShiftOpShiftImm {
+ ShiftOpShiftImm(self.0 & (bits - 1))
+ }
+}
+
+/// A shift operator with an amount, guaranteed to be within range.
+#[derive(Clone, Debug)]
+pub struct ShiftOpAndAmt {
+ op: ShiftOp,
+ shift: ShiftOpShiftImm,
+}
+
+impl ShiftOpAndAmt {
+ pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
+ ShiftOpAndAmt { op, shift }
+ }
+
+ /// Get the shift op.
+ pub fn op(&self) -> ShiftOp {
+ self.op
+ }
+
+ /// Get the shift amount.
+ pub fn amt(&self) -> ShiftOpShiftImm {
+ self.shift
+ }
+}
+
+/// An extend operator for a register.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ExtendOp {
+ UXTB = 0b000,
+ UXTH = 0b001,
+ UXTW = 0b010,
+ UXTX = 0b011,
+ SXTB = 0b100,
+ SXTH = 0b101,
+ SXTW = 0b110,
+ SXTX = 0b111,
+}
+
+impl ExtendOp {
+ /// Encoding of this op.
+ pub fn bits(self) -> u8 {
+ self as u8
+ }
+}
+
+//=============================================================================
+// Instruction sub-components (memory addresses): definitions
+
+/// A reference to some memory address.
+#[derive(Clone, Debug)]
+pub enum MemLabel {
+ /// An address in the code, a constant pool or jumptable, with relative
+ /// offset from this instruction. This form must be used at emission time;
+ /// see `memlabel_finalize()` for how other forms are lowered to this one.
+ PCRel(i32),
+}
+
+/// An addressing mode specified for a load/store operation.
+#[derive(Clone, Debug)]
+pub enum AMode {
+ //
+ // Real ARM64 addressing modes:
+ //
+ /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
+ PostIndexed(Writable<Reg>, SImm9),
+ /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
+ PreIndexed(Writable<Reg>, SImm9),
+
+ // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
+ // what the ISA calls the "register offset" addressing mode. We split out
+ // several options here for more ergonomic codegen.
+ /// Register plus register offset.
+ RegReg(Reg, Reg),
+
+ /// Register plus register offset, scaled by type's size.
+ RegScaled(Reg, Reg, Type),
+
+ /// Register plus register offset, scaled by type's size, with index sign- or zero-extended
+ /// first.
+ RegScaledExtended(Reg, Reg, Type, ExtendOp),
+
+ /// Register plus register offset, with index sign- or zero-extended first.
+ RegExtended(Reg, Reg, ExtendOp),
+
+ /// Unscaled signed 9-bit immediate offset from reg.
+ Unscaled(Reg, SImm9),
+
+ /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
+ UnsignedOffset(Reg, UImm12Scaled),
+
+ //
+ // virtual addressing modes that are lowered at emission time:
+ //
+ /// Reference to a "label": e.g., a symbol.
+ Label(MemLabel),
+
+ /// Arbitrary offset from a register. Converted to generation of large
+ /// offsets with multiple instructions as necessary during code emission.
+ RegOffset(Reg, i64, Type),
+
+ /// Offset from the stack pointer.
+ SPOffset(i64, Type),
+
+ /// Offset from the frame pointer.
+ FPOffset(i64, Type),
+
+ /// Offset from the "nominal stack pointer", which is where the real SP is
+ /// just after stack and spill slots are allocated in the function prologue.
+ /// At emission time, this is converted to `SPOffset` with a fixup added to
+ /// the offset constant. The fixup is a running value that is tracked as
+ /// emission iterates through instructions in linear order, and can be
+ /// adjusted up and down with [Inst::VirtualSPOffsetAdj].
+ ///
+ /// The standard ABI is in charge of handling this (by emitting the
+ /// adjustment meta-instructions). It maintains the invariant that "nominal
+ /// SP" is where the actual SP is after the function prologue and before
+ /// clobber pushes. See the diagram in the documentation for
+ /// [crate::isa::aarch64::abi](the ABI module) for more details.
+ NominalSPOffset(i64, Type),
+}
+
+impl AMode {
+ /// Memory reference using an address in a register.
+ pub fn reg(reg: Reg) -> AMode {
+ // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur.
+ // This also does not use PostIndexed / PreIndexed as they update the register.
+ AMode::UnsignedOffset(reg, UImm12Scaled::zero(I64))
+ }
+
+ /// Memory reference using the sum of two registers as an address.
+ pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> AMode {
+ AMode::RegReg(reg1, reg2)
+ }
+
+ /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
+ pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> AMode {
+ AMode::RegScaled(reg1, reg2, ty)
+ }
+
+ /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or
+ /// zero-extended as per `op`.
+ pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> AMode {
+ AMode::RegScaledExtended(reg1, reg2, ty, op)
+ }
+
+ /// Memory reference to a label: a global function or value, or data in the constant pool.
+ pub fn label(label: MemLabel) -> AMode {
+ AMode::Label(label)
+ }
+}
+
+/// A memory argument to a load/store-pair.
+#[derive(Clone, Debug)]
+pub enum PairAMode {
+ SignedOffset(Reg, SImm7Scaled),
+ PreIndexed(Writable<Reg>, SImm7Scaled),
+ PostIndexed(Writable<Reg>, SImm7Scaled),
+}
+
+//=============================================================================
+// Instruction sub-components (conditions, branches and branch targets):
+// definitions
+
+/// Condition for conditional branches.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum Cond {
+ Eq = 0,
+ Ne = 1,
+ Hs = 2,
+ Lo = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ Hi = 8,
+ Ls = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15,
+}
+
+impl Cond {
+ /// Return the inverted condition.
+ pub fn invert(self) -> Cond {
+ match self {
+ Cond::Eq => Cond::Ne,
+ Cond::Ne => Cond::Eq,
+
+ Cond::Hs => Cond::Lo,
+ Cond::Lo => Cond::Hs,
+
+ Cond::Mi => Cond::Pl,
+ Cond::Pl => Cond::Mi,
+
+ Cond::Vs => Cond::Vc,
+ Cond::Vc => Cond::Vs,
+
+ Cond::Hi => Cond::Ls,
+ Cond::Ls => Cond::Hi,
+
+ Cond::Ge => Cond::Lt,
+ Cond::Lt => Cond::Ge,
+
+ Cond::Gt => Cond::Le,
+ Cond::Le => Cond::Gt,
+
+ Cond::Al => Cond::Nv,
+ Cond::Nv => Cond::Al,
+ }
+ }
+
+ /// Return the machine encoding of this condition.
+ pub fn bits(self) -> u32 {
+ self as u32
+ }
+}
+
+/// The kind of conditional branch: the common-case-optimized "reg-is-zero" /
+/// "reg-is-nonzero" variants, or the generic one that tests the machine
+/// condition codes.
+#[derive(Clone, Copy, Debug)]
+pub enum CondBrKind {
+ /// Condition: given register is zero.
+ Zero(Reg),
+ /// Condition: given register is nonzero.
+ NotZero(Reg),
+ /// Condition: the given condition-code test is true.
+ Cond(Cond),
+}
+
+impl CondBrKind {
+ /// Return the inverted branch condition.
+ pub fn invert(self) -> CondBrKind {
+ match self {
+ CondBrKind::Zero(reg) => CondBrKind::NotZero(reg),
+ CondBrKind::NotZero(reg) => CondBrKind::Zero(reg),
+ CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()),
+ }
+ }
+}
+
+/// A branch target. Either unresolved (basic-block index) or resolved (offset
+/// from end of current instruction).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum BranchTarget {
+ /// An unresolved reference to a Label, as passed into
+ /// `lower_branch_group()`.
+ Label(MachLabel),
+ /// A fixed PC offset.
+ ResolvedOffset(i32),
+}
+
+impl BranchTarget {
+ /// Return the target's label, if it is a label-based target.
+ pub fn as_label(self) -> Option<MachLabel> {
+ match self {
+ BranchTarget::Label(l) => Some(l),
+ _ => None,
+ }
+ }
+
+ /// Return the target's offset, if specified, or zero if label-based.
+ pub fn as_offset19_or_zero(self) -> u32 {
+ let off = match self {
+ BranchTarget::ResolvedOffset(off) => off >> 2,
+ _ => 0,
+ };
+ assert!(off <= 0x3ffff);
+ assert!(off >= -0x40000);
+ (off as u32) & 0x7ffff
+ }
+
+ /// Return the target's offset, if specified, or zero if label-based.
+ pub fn as_offset26_or_zero(self) -> u32 {
+ let off = match self {
+ BranchTarget::ResolvedOffset(off) => off >> 2,
+ _ => 0,
+ };
+ assert!(off <= 0x1ffffff);
+ assert!(off >= -0x2000000);
+ (off as u32) & 0x3ffffff
+ }
+}
+
+impl PrettyPrint for ShiftOpAndAmt {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("{:?} {}", self.op(), self.amt().value())
+ }
+}
+
+impl PrettyPrint for ExtendOp {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("{:?}", self)
+ }
+}
+
+impl PrettyPrint for MemLabel {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &MemLabel::PCRel(off) => format!("pc+{}", off),
+ }
+ }
+}
+
+fn shift_for_type(ty: Type) -> usize {
+ match ty.bytes() {
+ 1 => 0,
+ 2 => 1,
+ 4 => 2,
+ 8 => 3,
+ 16 => 4,
+ _ => panic!("unknown type: {}", ty),
+ }
+}
+
+impl PrettyPrint for AMode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &AMode::Unscaled(reg, simm9) => {
+ if simm9.value != 0 {
+ format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru))
+ } else {
+ format!("[{}]", reg.show_rru(mb_rru))
+ }
+ }
+ &AMode::UnsignedOffset(reg, uimm12) => {
+ if uimm12.value != 0 {
+ format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru))
+ } else {
+ format!("[{}]", reg.show_rru(mb_rru))
+ }
+ }
+ &AMode::RegReg(r1, r2) => {
+ format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),)
+ }
+ &AMode::RegScaled(r1, r2, ty) => {
+ let shift = shift_for_type(ty);
+ format!(
+ "[{}, {}, LSL #{}]",
+ r1.show_rru(mb_rru),
+ r2.show_rru(mb_rru),
+ shift,
+ )
+ }
+ &AMode::RegScaledExtended(r1, r2, ty, op) => {
+ let shift = shift_for_type(ty);
+ let size = match op {
+ ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
+ _ => OperandSize::Size64,
+ };
+ let op = op.show_rru(mb_rru);
+ format!(
+ "[{}, {}, {} #{}]",
+ r1.show_rru(mb_rru),
+ show_ireg_sized(r2, mb_rru, size),
+ op,
+ shift
+ )
+ }
+ &AMode::RegExtended(r1, r2, op) => {
+ let size = match op {
+ ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
+ _ => OperandSize::Size64,
+ };
+ let op = op.show_rru(mb_rru);
+ format!(
+ "[{}, {}, {}]",
+ r1.show_rru(mb_rru),
+ show_ireg_sized(r2, mb_rru, size),
+ op,
+ )
+ }
+ &AMode::Label(ref label) => label.show_rru(mb_rru),
+ &AMode::PreIndexed(r, simm9) => format!(
+ "[{}, {}]!",
+ r.to_reg().show_rru(mb_rru),
+ simm9.show_rru(mb_rru)
+ ),
+ &AMode::PostIndexed(r, simm9) => format!(
+ "[{}], {}",
+ r.to_reg().show_rru(mb_rru),
+ simm9.show_rru(mb_rru)
+ ),
+ // Eliminated by `mem_finalize()`.
+ &AMode::SPOffset(..)
+ | &AMode::FPOffset(..)
+ | &AMode::NominalSPOffset(..)
+ | &AMode::RegOffset(..) => {
+ panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
+ }
+ }
+ }
+}
+
+impl PrettyPrint for PairAMode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &PairAMode::SignedOffset(reg, simm7) => {
+ if simm7.value != 0 {
+ format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru))
+ } else {
+ format!("[{}]", reg.show_rru(mb_rru))
+ }
+ }
+ &PairAMode::PreIndexed(reg, simm7) => format!(
+ "[{}, {}]!",
+ reg.to_reg().show_rru(mb_rru),
+ simm7.show_rru(mb_rru)
+ ),
+ &PairAMode::PostIndexed(reg, simm7) => format!(
+ "[{}], {}",
+ reg.to_reg().show_rru(mb_rru),
+ simm7.show_rru(mb_rru)
+ ),
+ }
+ }
+}
+
+impl PrettyPrint for Cond {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ let mut s = format!("{:?}", self);
+ s.make_ascii_lowercase();
+ s
+ }
+}
+
+impl PrettyPrint for BranchTarget {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &BranchTarget::Label(label) => format!("label{:?}", label.get()),
+ &BranchTarget::ResolvedOffset(off) => format!("{}", off),
+ }
+ }
+}
+
+/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
+/// 64-bit variants of many instructions (and integer registers).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum OperandSize {
+ Size32,
+ Size64,
+}
+
+impl OperandSize {
+ /// 32-bit case?
+ pub fn is32(self) -> bool {
+ self == OperandSize::Size32
+ }
+ /// 64-bit case?
+ pub fn is64(self) -> bool {
+ self == OperandSize::Size64
+ }
+ /// Convert from an `is32` boolean flag to an `OperandSize`.
+ pub fn from_is32(is32: bool) -> OperandSize {
+ if is32 {
+ OperandSize::Size32
+ } else {
+ OperandSize::Size64
+ }
+ }
+ /// Convert from a needed width to the smallest size that fits.
+ pub fn from_bits<I: Into<usize>>(bits: I) -> OperandSize {
+ let bits: usize = bits.into();
+ assert!(bits <= 64);
+ if bits <= 32 {
+ OperandSize::Size32
+ } else {
+ OperandSize::Size64
+ }
+ }
+
+ /// Convert from an integer type into the smallest size that fits.
+ pub fn from_ty(ty: Type) -> OperandSize {
+ Self::from_bits(ty_bits(ty))
+ }
+
+ /// Convert to I32, I64, or I128.
+ pub fn to_ty(self) -> Type {
+ match self {
+ OperandSize::Size32 => I32,
+ OperandSize::Size64 => I64,
+ }
+ }
+
+ pub fn sf_bit(&self) -> u32 {
+ match self {
+ OperandSize::Size32 => 0,
+ OperandSize::Size64 => 1,
+ }
+ }
+}
+
+/// Type used to communicate the size of a scalar SIMD & FP operand.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ScalarSize {
+ Size8,
+ Size16,
+ Size32,
+ Size64,
+ Size128,
+}
+
+impl ScalarSize {
+ /// Convert from a needed width to the smallest size that fits.
+ pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize {
+ match bits.into().next_power_of_two() {
+ 8 => ScalarSize::Size8,
+ 16 => ScalarSize::Size16,
+ 32 => ScalarSize::Size32,
+ 64 => ScalarSize::Size64,
+ 128 => ScalarSize::Size128,
+ w => panic!("Unexpected type width: {}", w),
+ }
+ }
+
+ /// Convert to an integer operand size.
+ pub fn operand_size(&self) -> OperandSize {
+ match self {
+ ScalarSize::Size32 => OperandSize::Size32,
+ ScalarSize::Size64 => OperandSize::Size64,
+ _ => panic!("Unexpected operand_size request for: {:?}", self),
+ }
+ }
+
+ /// Convert from a type into the smallest size that fits.
+ pub fn from_ty(ty: Type) -> ScalarSize {
+ Self::from_bits(ty_bits(ty))
+ }
+
+ /// Return the encoding bits that are used by some scalar FP instructions
+ /// for a particular operand size.
+ pub fn ftype(&self) -> u32 {
+ match self {
+ ScalarSize::Size16 => 0b11,
+ ScalarSize::Size32 => 0b00,
+ ScalarSize::Size64 => 0b01,
+ _ => panic!("Unexpected scalar FP operand size: {:?}", self),
+ }
+ }
+}
+
+/// Type used to communicate the size of a vector operand.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum VectorSize {
+ Size8x8,
+ Size8x16,
+ Size16x4,
+ Size16x8,
+ Size32x2,
+ Size32x4,
+ Size64x2,
+}
+
+impl VectorSize {
+ /// Get the vector operand size with the given scalar size as lane size.
+ pub fn from_lane_size(size: ScalarSize, is_128bit: bool) -> VectorSize {
+ match (size, is_128bit) {
+ (ScalarSize::Size8, false) => VectorSize::Size8x8,
+ (ScalarSize::Size8, true) => VectorSize::Size8x16,
+ (ScalarSize::Size16, false) => VectorSize::Size16x4,
+ (ScalarSize::Size16, true) => VectorSize::Size16x8,
+ (ScalarSize::Size32, false) => VectorSize::Size32x2,
+ (ScalarSize::Size32, true) => VectorSize::Size32x4,
+ (ScalarSize::Size64, true) => VectorSize::Size64x2,
+ _ => panic!("Unexpected scalar FP operand size: {:?}", size),
+ }
+ }
+
+ /// Convert from a type into a vector operand size.
+ pub fn from_ty(ty: Type) -> VectorSize {
+ match ty {
+ B8X16 => VectorSize::Size8x16,
+ B16X8 => VectorSize::Size16x8,
+ B32X4 => VectorSize::Size32x4,
+ B64X2 => VectorSize::Size64x2,
+ F32X2 => VectorSize::Size32x2,
+ F32X4 => VectorSize::Size32x4,
+ F64X2 => VectorSize::Size64x2,
+ I8X8 => VectorSize::Size8x8,
+ I8X16 => VectorSize::Size8x16,
+ I16X4 => VectorSize::Size16x4,
+ I16X8 => VectorSize::Size16x8,
+ I32X2 => VectorSize::Size32x2,
+ I32X4 => VectorSize::Size32x4,
+ I64X2 => VectorSize::Size64x2,
+ _ => unimplemented!("Unsupported type: {}", ty),
+ }
+ }
+
+ /// Get the integer operand size that corresponds to a lane of a vector with a certain size.
+ pub fn operand_size(&self) -> OperandSize {
+ match self {
+ VectorSize::Size64x2 => OperandSize::Size64,
+ _ => OperandSize::Size32,
+ }
+ }
+
+ /// Get the scalar operand size that corresponds to a lane of a vector with a certain size.
+ pub fn lane_size(&self) -> ScalarSize {
+ match self {
+ VectorSize::Size8x8 => ScalarSize::Size8,
+ VectorSize::Size8x16 => ScalarSize::Size8,
+ VectorSize::Size16x4 => ScalarSize::Size16,
+ VectorSize::Size16x8 => ScalarSize::Size16,
+ VectorSize::Size32x2 => ScalarSize::Size32,
+ VectorSize::Size32x4 => ScalarSize::Size32,
+ VectorSize::Size64x2 => ScalarSize::Size64,
+ }
+ }
+
+ pub fn is_128bits(&self) -> bool {
+ match self {
+ VectorSize::Size8x8 => false,
+ VectorSize::Size8x16 => true,
+ VectorSize::Size16x4 => false,
+ VectorSize::Size16x8 => true,
+ VectorSize::Size32x2 => false,
+ VectorSize::Size32x4 => true,
+ VectorSize::Size64x2 => true,
+ }
+ }
+
+ /// Produces a `VectorSize` with lanes twice as wide. Note that if the resulting
+ /// size would exceed 128 bits, then the number of lanes is also halved, so as to
+ /// ensure that the result size is at most 128 bits.
+ pub fn widen(&self) -> VectorSize {
+ match self {
+ VectorSize::Size8x8 => VectorSize::Size16x8,
+ VectorSize::Size8x16 => VectorSize::Size16x8,
+ VectorSize::Size16x4 => VectorSize::Size32x4,
+ VectorSize::Size16x8 => VectorSize::Size32x4,
+ VectorSize::Size32x2 => VectorSize::Size64x2,
+ VectorSize::Size32x4 => VectorSize::Size64x2,
+ VectorSize::Size64x2 => unreachable!(),
+ }
+ }
+
+ /// Produces a `VectorSize` that has the same lane width, but half as many lanes.
+ pub fn halve(&self) -> VectorSize {
+ match self {
+ VectorSize::Size8x16 => VectorSize::Size8x8,
+ VectorSize::Size16x8 => VectorSize::Size16x4,
+ VectorSize::Size32x4 => VectorSize::Size32x2,
+ _ => *self,
+ }
+ }
+
+ /// Return the encoding bits that are used by some SIMD instructions
+ /// for a particular operand size.
+ pub fn enc_size(&self) -> (u32, u32) {
+ let q = self.is_128bits() as u32;
+ let size = match self.lane_size() {
+ ScalarSize::Size8 => 0b00,
+ ScalarSize::Size16 => 0b01,
+ ScalarSize::Size32 => 0b10,
+ ScalarSize::Size64 => 0b11,
+ _ => unreachable!(),
+ };
+
+ (q, size)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
new file mode 100644
index 0000000000..5d0270dade
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
@@ -0,0 +1,2359 @@
+//! AArch64 ISA: binary code emission.
+
+use crate::binemit::{CodeOffset, Reloc, StackMap};
+use crate::ir::constant::ConstantData;
+use crate::ir::types::*;
+use crate::ir::{MemFlags, TrapCode};
+use crate::isa::aarch64::inst::*;
+use crate::machinst::ty_bits;
+
+use regalloc::{Reg, RegClass, Writable};
+
+use core::convert::TryFrom;
+use log::debug;
+
+/// Memory label/reference finalization: convert a MemLabel to a PC-relative
+/// offset, possibly emitting relocation(s) as necessary.
+pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
+ match label {
+ &MemLabel::PCRel(rel) => rel,
+ }
+}
+
+/// Memory addressing mode finalization: convert "special" modes (e.g.,
+/// generic arbitrary stack offset) into real addressing modes, possibly by
+/// emitting some helper instructions that come immediately before the use
+/// of this amode.
+pub fn mem_finalize(
+ insn_off: CodeOffset,
+ mem: &AMode,
+ state: &EmitState,
+) -> (SmallVec<[Inst; 4]>, AMode) {
+ match mem {
+ &AMode::RegOffset(_, off, ty)
+ | &AMode::SPOffset(off, ty)
+ | &AMode::FPOffset(off, ty)
+ | &AMode::NominalSPOffset(off, ty) => {
+ let basereg = match mem {
+ &AMode::RegOffset(reg, _, _) => reg,
+ &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(),
+ &AMode::FPOffset(..) => fp_reg(),
+ _ => unreachable!(),
+ };
+ let adj = match mem {
+ &AMode::NominalSPOffset(..) => {
+ debug!(
+ "mem_finalize: nominal SP offset {} + adj {} -> {}",
+ off,
+ state.virtual_sp_offset,
+ off + state.virtual_sp_offset
+ );
+ state.virtual_sp_offset
+ }
+ _ => 0,
+ };
+ let off = off + adj;
+
+ if let Some(simm9) = SImm9::maybe_from_i64(off) {
+ let mem = AMode::Unscaled(basereg, simm9);
+ (smallvec![], mem)
+ } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
+ let mem = AMode::UnsignedOffset(basereg, uimm12s);
+ (smallvec![], mem)
+ } else {
+ let tmp = writable_spilltmp_reg();
+ let mut const_insts = Inst::load_constant(tmp, off as u64);
+ // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
+ // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
+ // is a valid base (for SPOffset) which we must handle here.
+ // Also, SP needs to be the first arg, not second.
+ let add_inst = Inst::AluRRRExtend {
+ alu_op: ALUOp::Add64,
+ rd: tmp,
+ rn: basereg,
+ rm: tmp.to_reg(),
+ extendop: ExtendOp::UXTX,
+ };
+ const_insts.push(add_inst);
+ (const_insts, AMode::reg(tmp.to_reg()))
+ }
+ }
+
+ &AMode::Label(ref label) => {
+ let off = memlabel_finalize(insn_off, label);
+ (smallvec![], AMode::Label(MemLabel::PCRel(off)))
+ }
+
+ _ => (smallvec![], mem.clone()),
+ }
+}
+
+/// Helper: get a ConstantData from a u64.
+pub fn u64_constant(bits: u64) -> ConstantData {
+ let data = bits.to_le_bytes();
+ ConstantData::from(&data[..])
+}
+
+//=============================================================================
+// Instructions and subcomponents: emission
+
+fn machreg_to_gpr(m: Reg) -> u32 {
+ assert_eq!(m.get_class(), RegClass::I64);
+ u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn machreg_to_vec(m: Reg) -> u32 {
+ assert_eq!(m.get_class(), RegClass::V128);
+ u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
+ u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+ (bits_31_21 << 21)
+ | (bits_15_10 << 10)
+ | machreg_to_gpr(rd.to_reg())
+ | (machreg_to_gpr(rn) << 5)
+ | (machreg_to_gpr(rm) << 16)
+}
+
+fn enc_arith_rr_imm12(
+ bits_31_24: u32,
+ immshift: u32,
+ imm12: u32,
+ rn: Reg,
+ rd: Writable<Reg>,
+) -> u32 {
+ (bits_31_24 << 24)
+ | (immshift << 22)
+ | (imm12 << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+ (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
+ (top11 << 21)
+ | (machreg_to_gpr(rm) << 16)
+ | (bit15 << 15)
+ | (machreg_to_gpr(ra) << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
+ assert!(off_26_0 < (1 << 26));
+ (op_31_26 << 26) | off_26_0
+}
+
+fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
+ assert!(off_18_0 < (1 << 19));
+ (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
+}
+
+fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
+ assert!(off_18_0 < (1 << 19));
+ assert!(cond < (1 << 4));
+ (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
+}
+
+fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
+ match kind {
+ CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
+ CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
+ CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
+ }
+}
+
+const MOVE_WIDE_FIXED: u32 = 0x12800000;
+
+#[repr(u32)]
+enum MoveWideOpcode {
+ MOVN = 0b00,
+ MOVZ = 0b10,
+ MOVK = 0b11,
+}
+
+fn enc_move_wide(
+ op: MoveWideOpcode,
+ rd: Writable<Reg>,
+ imm: MoveWideConst,
+ size: OperandSize,
+) -> u32 {
+ assert!(imm.shift <= 0b11);
+ MOVE_WIDE_FIXED
+ | size.sf_bit() << 31
+ | (op as u32) << 29
+ | u32::from(imm.shift) << 21
+ | u32::from(imm.bits) << 5
+ | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
+ (op_31_22 << 22)
+ | (simm7.bits() << 15)
+ | (machreg_to_gpr(rt2) << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rt)
+}
+
+fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
+ (op_31_22 << 22)
+ | (simm9.bits() << 12)
+ | (op_11_10 << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
+ (op_31_22 << 22)
+ | (0b1 << 24)
+ | (uimm12.bits() << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_reg(
+ op_31_22: u32,
+ rn: Reg,
+ rm: Reg,
+ s_bit: bool,
+ extendop: Option<ExtendOp>,
+ rd: Reg,
+) -> u32 {
+ let s_bit = if s_bit { 1 } else { 0 };
+ let extend_bits = match extendop {
+ Some(ExtendOp::UXTW) => 0b010,
+ Some(ExtendOp::SXTW) => 0b110,
+ Some(ExtendOp::SXTX) => 0b111,
+ None => 0b011, // LSL
+ _ => panic!("bad extend mode for ld/st AMode"),
+ };
+ (op_31_22 << 22)
+ | (1 << 21)
+ | (machreg_to_gpr(rm) << 16)
+ | (extend_bits << 13)
+ | (s_bit << 12)
+ | (0b10 << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
+ (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
+ debug_assert_eq!(q & 0b1, q);
+ debug_assert_eq!(size & 0b11, size);
+ 0b0_0_0011010_10_00000_110_0_00_00000_00000
+ | q << 30
+ | size << 10
+ | machreg_to_gpr(rn) << 5
+ | machreg_to_vec(rt.to_reg())
+}
+
+fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+ (top11 << 21)
+ | (machreg_to_vec(rm) << 16)
+ | (bit15_10 << 10)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+ (0b01011010110 << 21)
+ | size << 31
+ | opcode2 << 16
+ | opcode1 << 10
+ | machreg_to_gpr(rn) << 5
+ | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_br(rn: Reg) -> u32 {
+ 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
+}
+
+fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
+ let off = u32::try_from(off).unwrap();
+ let immlo = off & 3;
+ let immhi = (off >> 2) & ((1 << 19) - 1);
+ (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
+ 0b100_11010100_00000_0000_00_00000_00000
+ | (machreg_to_gpr(rm) << 16)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rd.to_reg())
+ | (cond.bits() << 12)
+}
+
+fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
+ 0b000_11110_00_1_00000_0000_11_00000_00000
+ | (size.ftype() << 22)
+ | (machreg_to_vec(rm) << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+ | (cond.bits() << 12)
+}
+
+fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
+ 0b100_11010100_11111_0000_01_11111_00000
+ | machreg_to_gpr(rd.to_reg())
+ | (cond.invert().bits() << 12)
+}
+
+fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
+ 0b0_1_1_11010010_00000_0000_10_00000_0_0000
+ | size.sf_bit() << 31
+ | imm.bits() << 16
+ | cond.bits() << 12
+ | machreg_to_gpr(rn) << 5
+ | nzcv.bits()
+}
+
+fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
+ 0b00001110_101_00000_00011_1_00000_00000
+ | ((is_16b as u32) << 30)
+ | machreg_to_vec(rd.to_reg())
+ | (machreg_to_vec(rn) << 16)
+ | (machreg_to_vec(rn) << 5)
+}
+
+fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+ (top22 << 10)
+ | (machreg_to_vec(rm) << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
+ (top17 << 15)
+ | (machreg_to_vec(rm) << 16)
+ | (machreg_to_vec(ra) << 10)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
+ 0b000_11110_00_1_00000_00_1000_00000_00000
+ | (size.ftype() << 22)
+ | (machreg_to_vec(rm) << 16)
+ | (machreg_to_vec(rn) << 5)
+}
+
+fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ debug_assert_eq!(qu & 0b11, qu);
+ debug_assert_eq!(size & 0b11, size);
+ debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
+ let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
+ bits | qu << 29
+ | size << 22
+ | bits_12_16 << 12
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ debug_assert_eq!(q & 0b1, q);
+ debug_assert_eq!(u & 0b1, u);
+ debug_assert_eq!(size & 0b11, size);
+ debug_assert_eq!(opcode & 0b11111, opcode);
+ 0b0_0_0_01110_00_11000_0_0000_10_00000_00000
+ | q << 30
+ | u << 29
+ | size << 22
+ | opcode << 12
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+ debug_assert_eq!(len & 0b11, len);
+ 0b0_1_001110_000_00000_0_00_0_00_00000_00000
+ | (machreg_to_vec(rm) << 16)
+ | len << 13
+ | (is_extension as u32) << 12
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_dmb_ish() -> u32 {
+ 0xD5033BBF
+}
+
+fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
+ let sz = match ty {
+ I64 => 0b11,
+ I32 => 0b10,
+ I16 => 0b01,
+ I8 => 0b00,
+ _ => unreachable!(),
+ };
+ 0b00001000_01011111_01111100_00000000
+ | (sz << 30)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rt.to_reg())
+}
+
+fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
+ let sz = match ty {
+ I64 => 0b11,
+ I32 => 0b10,
+ I16 => 0b01,
+ I8 => 0b00,
+ _ => unreachable!(),
+ };
+ 0b00001000_00000000_01111100_00000000
+ | (sz << 30)
+ | (machreg_to_gpr(rs.to_reg()) << 16)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rt)
+}
+
+fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
+ let abc = (imm >> 5) as u32;
+ let defgh = (imm & 0b11111) as u32;
+
+ debug_assert_eq!(cmode & 0b1111, cmode);
+ debug_assert_eq!(q_op & 0b11, q_op);
+
+ 0b0_0_0_0111100000_000_0000_01_00000_00000
+ | (q_op << 29)
+ | (abc << 16)
+ | (cmode << 12)
+ | (defgh << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+/// State carried between emissions of a sequence of instructions.
+#[derive(Default, Clone, Debug)]
+pub struct EmitState {
+ /// Addend to convert nominal-SP offsets to real-SP offsets at the current
+ /// program point.
+ pub(crate) virtual_sp_offset: i64,
+ /// Offset of FP from nominal-SP.
+ pub(crate) nominal_sp_to_fp: i64,
+ /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
+ stack_map: Option<StackMap>,
+ /// Current source-code location corresponding to instruction to be emitted.
+ cur_srcloc: SourceLoc,
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+ fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
+ EmitState {
+ virtual_sp_offset: 0,
+ nominal_sp_to_fp: abi.frame_size() as i64,
+ stack_map: None,
+ cur_srcloc: SourceLoc::default(),
+ }
+ }
+
+ fn pre_safepoint(&mut self, stack_map: StackMap) {
+ self.stack_map = Some(stack_map);
+ }
+
+ fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
+ self.cur_srcloc = srcloc;
+ }
+}
+
+impl EmitState {
+ fn take_stack_map(&mut self) -> Option<StackMap> {
+ self.stack_map.take()
+ }
+
+ fn clear_post_insn(&mut self) {
+ self.stack_map = None;
+ }
+
+ fn cur_srcloc(&self) -> SourceLoc {
+ self.cur_srcloc
+ }
+}
+
+/// Constant state used during function compilation.
+pub struct EmitInfo(settings::Flags);
+
+impl EmitInfo {
+ pub(crate) fn new(flags: settings::Flags) -> Self {
+ Self(flags)
+ }
+}
+
+impl MachInstEmitInfo for EmitInfo {
+ fn flags(&self) -> &settings::Flags {
+ &self.0
+ }
+}
+
+impl MachInstEmit for Inst {
+ type State = EmitState;
+ type Info = EmitInfo;
+ type UnwindInfo = super::unwind::AArch64UnwindInfo;
+
+ fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
+ // N.B.: we *must* not exceed the "worst-case size" used to compute
+ // where to insert islands, except when islands are explicitly triggered
+ // (with an `EmitIsland`). We check this in debug builds. This is `mut`
+ // to allow disabling the check for `JTSequence`, which is always
+ // emitted following an `EmitIsland`.
+ let mut start_off = sink.cur_offset();
+
+ match self {
+ &Inst::AluRRR { alu_op, rd, rn, rm } => {
+ let top11 = match alu_op {
+ ALUOp::Add32 => 0b00001011_000,
+ ALUOp::Add64 => 0b10001011_000,
+ ALUOp::Sub32 => 0b01001011_000,
+ ALUOp::Sub64 => 0b11001011_000,
+ ALUOp::Orr32 => 0b00101010_000,
+ ALUOp::Orr64 => 0b10101010_000,
+ ALUOp::And32 => 0b00001010_000,
+ ALUOp::And64 => 0b10001010_000,
+ ALUOp::Eor32 => 0b01001010_000,
+ ALUOp::Eor64 => 0b11001010_000,
+ ALUOp::OrrNot32 => 0b00101010_001,
+ ALUOp::OrrNot64 => 0b10101010_001,
+ ALUOp::AndNot32 => 0b00001010_001,
+ ALUOp::AndNot64 => 0b10001010_001,
+ ALUOp::EorNot32 => 0b01001010_001,
+ ALUOp::EorNot64 => 0b11001010_001,
+ ALUOp::AddS32 => 0b00101011_000,
+ ALUOp::AddS64 => 0b10101011_000,
+ ALUOp::SubS32 => 0b01101011_000,
+ ALUOp::SubS64 => 0b11101011_000,
+ ALUOp::SDiv64 => 0b10011010_110,
+ ALUOp::UDiv64 => 0b10011010_110,
+ ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
+ ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
+ ALUOp::SMulH => 0b10011011_010,
+ ALUOp::UMulH => 0b10011011_110,
+ };
+ let bit15_10 = match alu_op {
+ ALUOp::SDiv64 => 0b000011,
+ ALUOp::UDiv64 => 0b000010,
+ ALUOp::RotR32 | ALUOp::RotR64 => 0b001011,
+ ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
+ ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
+ ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
+ ALUOp::SMulH | ALUOp::UMulH => 0b011111,
+ _ => 0b000000,
+ };
+ debug_assert_ne!(writable_stack_reg(), rd);
+ // The stack pointer is the zero register in this context, so this might be an
+ // indication that something is wrong.
+ debug_assert_ne!(stack_reg(), rn);
+ debug_assert_ne!(stack_reg(), rm);
+ sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
+ }
+ &Inst::AluRRRR {
+ alu_op,
+ rd,
+ rm,
+ rn,
+ ra,
+ } => {
+ let (top11, bit15) = match alu_op {
+ ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
+ ALUOp3::MSub32 => (0b0_00_11011_000, 1),
+ ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
+ ALUOp3::MSub64 => (0b1_00_11011_000, 1),
+ };
+ sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
+ }
+ &Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn,
+ ref imm12,
+ } => {
+ let top8 = match alu_op {
+ ALUOp::Add32 => 0b000_10001,
+ ALUOp::Add64 => 0b100_10001,
+ ALUOp::Sub32 => 0b010_10001,
+ ALUOp::Sub64 => 0b110_10001,
+ ALUOp::AddS32 => 0b001_10001,
+ ALUOp::AddS64 => 0b101_10001,
+ ALUOp::SubS32 => 0b011_10001,
+ ALUOp::SubS64 => 0b111_10001,
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ sink.put4(enc_arith_rr_imm12(
+ top8,
+ imm12.shift_bits(),
+ imm12.imm_bits(),
+ rn,
+ rd,
+ ));
+ }
+ &Inst::AluRRImmLogic {
+ alu_op,
+ rd,
+ rn,
+ ref imml,
+ } => {
+ let (top9, inv) = match alu_op {
+ ALUOp::Orr32 => (0b001_100100, false),
+ ALUOp::Orr64 => (0b101_100100, false),
+ ALUOp::And32 => (0b000_100100, false),
+ ALUOp::And64 => (0b100_100100, false),
+ ALUOp::Eor32 => (0b010_100100, false),
+ ALUOp::Eor64 => (0b110_100100, false),
+ ALUOp::OrrNot32 => (0b001_100100, true),
+ ALUOp::OrrNot64 => (0b101_100100, true),
+ ALUOp::AndNot32 => (0b000_100100, true),
+ ALUOp::AndNot64 => (0b100_100100, true),
+ ALUOp::EorNot32 => (0b010_100100, true),
+ ALUOp::EorNot64 => (0b110_100100, true),
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ let imml = if inv { imml.invert() } else { imml.clone() };
+ sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
+ }
+
+ &Inst::AluRRImmShift {
+ alu_op,
+ rd,
+ rn,
+ ref immshift,
+ } => {
+ let amt = immshift.value();
+ let (top10, immr, imms) = match alu_op {
+ ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
+ ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
+ ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
+ ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
+ ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
+ ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
+ ALUOp::Lsl32 => (
+ 0b0101001100,
+ u32::from((32 - amt) % 32),
+ u32::from(31 - amt),
+ ),
+ ALUOp::Lsl64 => (
+ 0b1101001101,
+ u32::from((64 - amt) % 64),
+ u32::from(63 - amt),
+ ),
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ sink.put4(
+ (top10 << 22)
+ | (immr << 16)
+ | (imms << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rd.to_reg()),
+ );
+ }
+
+ &Inst::AluRRRShift {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ref shiftop,
+ } => {
+ let top11: u32 = match alu_op {
+ ALUOp::Add32 => 0b000_01011000,
+ ALUOp::Add64 => 0b100_01011000,
+ ALUOp::AddS32 => 0b001_01011000,
+ ALUOp::AddS64 => 0b101_01011000,
+ ALUOp::Sub32 => 0b010_01011000,
+ ALUOp::Sub64 => 0b110_01011000,
+ ALUOp::SubS32 => 0b011_01011000,
+ ALUOp::SubS64 => 0b111_01011000,
+ ALUOp::Orr32 => 0b001_01010000,
+ ALUOp::Orr64 => 0b101_01010000,
+ ALUOp::And32 => 0b000_01010000,
+ ALUOp::And64 => 0b100_01010000,
+ ALUOp::Eor32 => 0b010_01010000,
+ ALUOp::Eor64 => 0b110_01010000,
+ ALUOp::OrrNot32 => 0b001_01010001,
+ ALUOp::OrrNot64 => 0b101_01010001,
+ ALUOp::EorNot32 => 0b010_01010001,
+ ALUOp::EorNot64 => 0b110_01010001,
+ ALUOp::AndNot32 => 0b000_01010001,
+ ALUOp::AndNot64 => 0b100_01010001,
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
+ let bits_15_10 = u32::from(shiftop.amt().value());
+ sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
+ }
+
+ &Inst::AluRRRExtend {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ extendop,
+ } => {
+ let top11: u32 = match alu_op {
+ ALUOp::Add32 => 0b00001011001,
+ ALUOp::Add64 => 0b10001011001,
+ ALUOp::Sub32 => 0b01001011001,
+ ALUOp::Sub64 => 0b11001011001,
+ ALUOp::AddS32 => 0b00101011001,
+ ALUOp::AddS64 => 0b10101011001,
+ ALUOp::SubS32 => 0b01101011001,
+ ALUOp::SubS64 => 0b11101011001,
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ let bits_15_10 = u32::from(extendop.bits()) << 3;
+ sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
+ }
+
+ &Inst::BitRR { op, rd, rn, .. } => {
+ let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
+ let (op1, op2) = match op {
+ BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
+ BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
+ BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
+ };
+ sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
+ }
+
+ &Inst::ULoad8 { rd, ref mem, flags }
+ | &Inst::SLoad8 { rd, ref mem, flags }
+ | &Inst::ULoad16 { rd, ref mem, flags }
+ | &Inst::SLoad16 { rd, ref mem, flags }
+ | &Inst::ULoad32 { rd, ref mem, flags }
+ | &Inst::SLoad32 { rd, ref mem, flags }
+ | &Inst::ULoad64 {
+ rd, ref mem, flags, ..
+ }
+ | &Inst::FpuLoad32 { rd, ref mem, flags }
+ | &Inst::FpuLoad64 { rd, ref mem, flags }
+ | &Inst::FpuLoad128 { rd, ref mem, flags } => {
+ let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+
+ // ldst encoding helpers take Reg, not Writable<Reg>.
+ let rd = rd.to_reg();
+
+ // This is the base opcode (top 10 bits) for the "unscaled
+ // immediate" form (Unscaled). Other addressing modes will OR in
+ // other values for bits 24/25 (bits 1/2 of this constant).
+ let (op, bits) = match self {
+ &Inst::ULoad8 { .. } => (0b0011100001, 8),
+ &Inst::SLoad8 { .. } => (0b0011100010, 8),
+ &Inst::ULoad16 { .. } => (0b0111100001, 16),
+ &Inst::SLoad16 { .. } => (0b0111100010, 16),
+ &Inst::ULoad32 { .. } => (0b1011100001, 32),
+ &Inst::SLoad32 { .. } => (0b1011100010, 32),
+ &Inst::ULoad64 { .. } => (0b1111100001, 64),
+ &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
+ &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
+ &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
+ _ => unreachable!(),
+ };
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && !flags.notrap() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ match &mem {
+ &AMode::Unscaled(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
+ }
+ &AMode::UnsignedOffset(reg, uimm12scaled) => {
+ if uimm12scaled.value() != 0 {
+ assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
+ }
+ sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
+ }
+ &AMode::RegReg(r1, r2) => {
+ sink.put4(enc_ldst_reg(
+ op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
+ ));
+ }
+ &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => {
+ assert_eq!(bits, ty_bits(ty));
+ let extendop = match &mem {
+ &AMode::RegScaled(..) => None,
+ &AMode::RegScaledExtended(_, _, _, op) => Some(op),
+ _ => unreachable!(),
+ };
+ sink.put4(enc_ldst_reg(
+ op, r1, r2, /* scaled = */ true, extendop, rd,
+ ));
+ }
+ &AMode::RegExtended(r1, r2, extendop) => {
+ sink.put4(enc_ldst_reg(
+ op,
+ r1,
+ r2,
+ /* scaled = */ false,
+ Some(extendop),
+ rd,
+ ));
+ }
+ &AMode::Label(ref label) => {
+ let offset = match label {
+ // cast i32 to u32 (two's-complement)
+ &MemLabel::PCRel(off) => off as u32,
+ } / 4;
+ assert!(offset < (1 << 19));
+ match self {
+ &Inst::ULoad32 { .. } => {
+ sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
+ }
+ &Inst::SLoad32 { .. } => {
+ sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
+ }
+ &Inst::FpuLoad32 { .. } => {
+ sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
+ }
+ &Inst::ULoad64 { .. } => {
+ sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
+ }
+ &Inst::FpuLoad64 { .. } => {
+ sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
+ }
+ &Inst::FpuLoad128 { .. } => {
+ sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
+ }
+ _ => panic!("Unspported size for LDR from constant pool!"),
+ }
+ }
+ &AMode::PreIndexed(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+ }
+ &AMode::PostIndexed(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+ }
+ // Eliminated by `mem_finalize()` above.
+ &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
+ panic!("Should not see stack-offset here!")
+ }
+ &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
+ }
+ }
+
+ &Inst::Store8 { rd, ref mem, flags }
+ | &Inst::Store16 { rd, ref mem, flags }
+ | &Inst::Store32 { rd, ref mem, flags }
+ | &Inst::Store64 { rd, ref mem, flags }
+ | &Inst::FpuStore32 { rd, ref mem, flags }
+ | &Inst::FpuStore64 { rd, ref mem, flags }
+ | &Inst::FpuStore128 { rd, ref mem, flags } => {
+ let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+
+ let (op, bits) = match self {
+ &Inst::Store8 { .. } => (0b0011100000, 8),
+ &Inst::Store16 { .. } => (0b0111100000, 16),
+ &Inst::Store32 { .. } => (0b1011100000, 32),
+ &Inst::Store64 { .. } => (0b1111100000, 64),
+ &Inst::FpuStore32 { .. } => (0b1011110000, 32),
+ &Inst::FpuStore64 { .. } => (0b1111110000, 64),
+ &Inst::FpuStore128 { .. } => (0b0011110010, 128),
+ _ => unreachable!(),
+ };
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && !flags.notrap() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ match &mem {
+ &AMode::Unscaled(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
+ }
+ &AMode::UnsignedOffset(reg, uimm12scaled) => {
+ if uimm12scaled.value() != 0 {
+ assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
+ }
+ sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
+ }
+ &AMode::RegReg(r1, r2) => {
+ sink.put4(enc_ldst_reg(
+ op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
+ ));
+ }
+ &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => {
+ let extendop = match &mem {
+ &AMode::RegScaled(..) => None,
+ &AMode::RegScaledExtended(_, _, _, op) => Some(op),
+ _ => unreachable!(),
+ };
+ sink.put4(enc_ldst_reg(
+ op, r1, r2, /* scaled = */ true, extendop, rd,
+ ));
+ }
+ &AMode::RegExtended(r1, r2, extendop) => {
+ sink.put4(enc_ldst_reg(
+ op,
+ r1,
+ r2,
+ /* scaled = */ false,
+ Some(extendop),
+ rd,
+ ));
+ }
+ &AMode::Label(..) => {
+ panic!("Store to a MemLabel not implemented!");
+ }
+ &AMode::PreIndexed(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+ }
+ &AMode::PostIndexed(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+ }
+ // Eliminated by `mem_finalize()` above.
+ &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
+ panic!("Should not see stack-offset here!")
+ }
+ &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
+ }
+ }
+
+ &Inst::StoreP64 {
+ rt,
+ rt2,
+ ref mem,
+ flags,
+ } => {
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && !flags.notrap() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ match mem {
+ &PairAMode::SignedOffset(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
+ }
+ &PairAMode::PreIndexed(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
+ }
+ &PairAMode::PostIndexed(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
+ }
+ }
+ }
+ &Inst::LoadP64 {
+ rt,
+ rt2,
+ ref mem,
+ flags,
+ } => {
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && !flags.notrap() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ let rt = rt.to_reg();
+ let rt2 = rt2.to_reg();
+ match mem {
+ &PairAMode::SignedOffset(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
+ }
+ &PairAMode::PreIndexed(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
+ }
+ &PairAMode::PostIndexed(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
+ }
+ }
+ }
+ &Inst::Mov64 { rd, rm } => {
+ assert!(rd.to_reg().get_class() == rm.get_class());
+ assert!(rm.get_class() == RegClass::I64);
+
+ // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+ // should never MOV to XZR.
+ assert!(rd.to_reg() != stack_reg());
+
+ if rm == stack_reg() {
+ // We can't use ORR here, so use an `add rd, sp, #0` instead.
+ let imm12 = Imm12::maybe_from_u64(0).unwrap();
+ sink.put4(enc_arith_rr_imm12(
+ 0b100_10001,
+ imm12.shift_bits(),
+ imm12.imm_bits(),
+ rm,
+ rd,
+ ));
+ } else {
+ // Encoded as ORR rd, rm, zero.
+ sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
+ }
+ }
+ &Inst::Mov32 { rd, rm } => {
+ // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+ // should never MOV to XZR.
+ assert!(machreg_to_gpr(rd.to_reg()) != 31);
+ // Encoded as ORR rd, rm, zero.
+ sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
+ }
+ &Inst::MovZ { rd, imm, size } => {
+ sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
+ }
+ &Inst::MovN { rd, imm, size } => {
+ sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
+ }
+ &Inst::MovK { rd, imm, size } => {
+ sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
+ }
+ &Inst::CSel { rd, rn, rm, cond } => {
+ sink.put4(enc_csel(rd, rn, rm, cond));
+ }
+ &Inst::CSet { rd, cond } => {
+ sink.put4(enc_cset(rd, cond));
+ }
+ &Inst::CCmpImm {
+ size,
+ rn,
+ imm,
+ nzcv,
+ cond,
+ } => {
+ sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
+ }
+ &Inst::AtomicRMW { ty, op } => {
+ /* Emit this:
+ dmb ish
+ again:
+ ldxr{,b,h} x/w27, [x25]
+ op x28, x27, x26 // op is add,sub,and,orr,eor
+ stxr{,b,h} w24, x/w28, [x25]
+ cbnz x24, again
+ dmb ish
+
+ Operand conventions:
+ IN: x25 (addr), x26 (2nd arg for op)
+ OUT: x27 (old value), x24 (trashed), x28 (trashed)
+
+ It is unfortunate that, per the ARM documentation, x28 cannot be used for
+ both the store-data and success-flag operands of stxr. This causes the
+ instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
+ instead for the success-flag.
+
+ In the case where the operation is 'xchg', the second insn is instead
+ mov x28, x26
+ so that we simply write in the destination, the "2nd arg for op".
+ */
+ let xzr = zero_reg();
+ let x24 = xreg(24);
+ let x25 = xreg(25);
+ let x26 = xreg(26);
+ let x27 = xreg(27);
+ let x28 = xreg(28);
+ let x24wr = writable_xreg(24);
+ let x27wr = writable_xreg(27);
+ let x28wr = writable_xreg(28);
+ let again_label = sink.get_label();
+
+ sink.put4(enc_dmb_ish()); // dmb ish
+
+ // again:
+ sink.bind_label(again_label);
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
+
+ if op == inst_common::AtomicRmwOp::Xchg {
+ // mov x28, x26
+ sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26))
+ } else {
+ // add/sub/and/orr/eor x28, x27, x26
+ let bits_31_21 = match op {
+ inst_common::AtomicRmwOp::Add => 0b100_01011_00_0,
+ inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0,
+ inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
+ inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
+ inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
+ inst_common::AtomicRmwOp::Xchg => unreachable!(),
+ };
+ sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
+ }
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
+
+ // cbnz w24, again
+ // Note, we're actually testing x24, and relying on the default zero-high-half
+ // rule in the assignment that `stxr` does.
+ let br_offset = sink.cur_offset();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(again_label),
+ CondBrKind::NotZero(x24),
+ ));
+ sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
+
+ sink.put4(enc_dmb_ish()); // dmb ish
+ }
+ &Inst::AtomicCAS { ty } => {
+ /* Emit this:
+ dmb ish
+ again:
+ ldxr{,b,h} x/w27, [x25]
+ and x24, x26, MASK (= 2^size_bits - 1)
+ cmp x27, x24
+ b.ne out
+ stxr{,b,h} w24, x/w28, [x25]
+ cbnz x24, again
+ out:
+ dmb ish
+
+ Operand conventions:
+ IN: x25 (addr), x26 (expected value), x28 (replacement value)
+ OUT: x27 (old value), x24 (trashed)
+ */
+ let xzr = zero_reg();
+ let x24 = xreg(24);
+ let x25 = xreg(25);
+ let x26 = xreg(26);
+ let x27 = xreg(27);
+ let x28 = xreg(28);
+ let xzrwr = writable_zero_reg();
+ let x24wr = writable_xreg(24);
+ let x27wr = writable_xreg(27);
+ let again_label = sink.get_label();
+ let out_label = sink.get_label();
+
+ sink.put4(enc_dmb_ish()); // dmb ish
+
+ // again:
+ sink.bind_label(again_label);
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
+
+ if ty == I64 {
+ // mov x24, x26
+ sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26))
+ } else {
+ // and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF
+ let (mask, s) = match ty {
+ I8 => (0xFF, 7),
+ I16 => (0xFFFF, 15),
+ I32 => (0xFFFFFFFF, 31),
+ _ => unreachable!(),
+ };
+ sink.put4(enc_arith_rr_imml(
+ 0b100_100100,
+ ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(),
+ x26,
+ x24wr,
+ ))
+ }
+
+ // cmp x27, x24 (== subs xzr, x27, x24)
+ sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24));
+
+ // b.ne out
+ let br_out_offset = sink.cur_offset();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(out_label),
+ CondBrKind::Cond(Cond::Ne),
+ ));
+ sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
+
+ // cbnz w24, again.
+ // Note, we're actually testing x24, and relying on the default zero-high-half
+ // rule in the assignment that `stxr` does.
+ let br_again_offset = sink.cur_offset();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(again_label),
+ CondBrKind::NotZero(x24),
+ ));
+ sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
+
+ // out:
+ sink.bind_label(out_label);
+ sink.put4(enc_dmb_ish()); // dmb ish
+ }
+ &Inst::AtomicLoad { ty, r_data, r_addr } => {
+ let op = match ty {
+ I8 => 0b0011100001,
+ I16 => 0b0111100001,
+ I32 => 0b1011100001,
+ I64 => 0b1111100001,
+ _ => unreachable!(),
+ };
+ sink.put4(enc_dmb_ish()); // dmb ish
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
+ sink.put4(enc_ldst_uimm12(
+ op,
+ uimm12scaled_zero,
+ r_addr,
+ r_data.to_reg(),
+ ));
+ }
+ &Inst::AtomicStore { ty, r_data, r_addr } => {
+ let op = match ty {
+ I8 => 0b0011100000,
+ I16 => 0b0111100000,
+ I32 => 0b1011100000,
+ I64 => 0b1111100000,
+ _ => unreachable!(),
+ };
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
+ sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data));
+ sink.put4(enc_dmb_ish()); // dmb ish
+ }
+ &Inst::Fence {} => {
+ sink.put4(enc_dmb_ish()); // dmb ish
+ }
+ &Inst::FpuMove64 { rd, rn } => {
+ sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
+ }
+ &Inst::FpuMove128 { rd, rn } => {
+ sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
+ }
+ &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
+ let (imm5, shift, mask) = match size.lane_size() {
+ ScalarSize::Size32 => (0b00100, 3, 0b011),
+ ScalarSize::Size64 => (0b01000, 4, 0b001),
+ _ => unimplemented!(),
+ };
+ debug_assert_eq!(idx & mask, idx);
+ let imm5 = imm5 | ((idx as u32) << shift);
+ sink.put4(
+ 0b010_11110000_00000_000001_00000_00000
+ | (imm5 << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::FpuRR { fpu_op, rd, rn } => {
+ let top22 = match fpu_op {
+ FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
+ FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
+ FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000,
+ FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000,
+ FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000,
+ FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000,
+ FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000,
+ FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000,
+ };
+ sink.put4(enc_fpurr(top22, rd, rn));
+ }
+ &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+ let top22 = match fpu_op {
+ FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
+ FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
+ FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
+ FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
+ FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
+ FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
+ FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
+ FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
+ FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
+ FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
+ FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
+ FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
+ FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
+ FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
+ FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
+ FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
+ };
+ sink.put4(enc_fpurrr(top22, rd, rn, rm));
+ }
+ &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
+ FPUOpRI::UShr32(imm) => {
+ debug_assert_eq!(32, imm.lane_size_in_bits);
+ sink.put4(
+ 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
+ | imm.enc() << 16
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg()),
+ )
+ }
+ FPUOpRI::UShr64(imm) => {
+ debug_assert_eq!(64, imm.lane_size_in_bits);
+ sink.put4(
+ 0b01_1_111110_0000000_00_0_0_0_1_00000_00000
+ | imm.enc() << 16
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg()),
+ )
+ }
+ FPUOpRI::Sli64(imm) => {
+ debug_assert_eq!(64, imm.lane_size_in_bits);
+ sink.put4(
+ 0b01_1_111110_0000000_010101_00000_00000
+ | imm.enc() << 16
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg()),
+ )
+ }
+ FPUOpRI::Sli32(imm) => {
+ debug_assert_eq!(32, imm.lane_size_in_bits);
+ sink.put4(
+ 0b0_0_1_011110_0000000_010101_00000_00000
+ | imm.enc() << 16
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg()),
+ )
+ }
+ },
+ &Inst::FpuRRRR {
+ fpu_op,
+ rd,
+ rn,
+ rm,
+ ra,
+ } => {
+ let top17 = match fpu_op {
+ FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
+ FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
+ };
+ sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
+ }
+ &Inst::VecMisc { op, rd, rn, size } => {
+ let (q, enc_size) = size.enc_size();
+ let (u, bits_12_16, size) = match op {
+ VecMisc2::Not => (0b1, 0b00101, 0b00),
+ VecMisc2::Neg => (0b1, 0b01011, enc_size),
+ VecMisc2::Abs => (0b0, 0b01011, enc_size),
+ VecMisc2::Fabs => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b01111, enc_size)
+ }
+ VecMisc2::Fneg => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b1, 0b01111, enc_size)
+ }
+ VecMisc2::Fsqrt => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b1, 0b11111, enc_size)
+ }
+ VecMisc2::Rev64 => {
+ debug_assert_ne!(VectorSize::Size64x2, size);
+ (0b0, 0b00000, enc_size)
+ }
+ VecMisc2::Shll => {
+ debug_assert_ne!(VectorSize::Size64x2, size);
+ debug_assert!(!size.is_128bits());
+ (0b1, 0b10011, enc_size)
+ }
+ VecMisc2::Fcvtzs => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11011, enc_size)
+ }
+ VecMisc2::Fcvtzu => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b1, 0b11011, enc_size)
+ }
+ VecMisc2::Scvtf => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11101, enc_size & 0b1)
+ }
+ VecMisc2::Ucvtf => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b1, 0b11101, enc_size & 0b1)
+ }
+ VecMisc2::Frintn => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11000, enc_size & 0b01)
+ }
+ VecMisc2::Frintz => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11001, enc_size | 0b10)
+ }
+ VecMisc2::Frintm => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11001, enc_size & 0b01)
+ }
+ VecMisc2::Frintp => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11000, enc_size | 0b10)
+ }
+ };
+ sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
+ }
+ &Inst::VecLanes { op, rd, rn, size } => {
+ let (q, size) = match size {
+ VectorSize::Size8x16 => (0b1, 0b00),
+ VectorSize::Size16x8 => (0b1, 0b01),
+ VectorSize::Size32x4 => (0b1, 0b10),
+ _ => unreachable!(),
+ };
+ let (u, opcode) = match op {
+ VecLanesOp::Uminv => (0b1, 0b11010),
+ VecLanesOp::Addv => (0b0, 0b11011),
+ };
+ sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
+ }
+ &Inst::VecShiftImm {
+ op,
+ rd,
+ rn,
+ size,
+ imm,
+ } => {
+ let (is_shr, template) = match op {
+ VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
+ VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
+ VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
+ };
+ let imm = imm as u32;
+ // Deal with the somewhat strange encoding scheme for, and limits on,
+ // the shift amount.
+ let immh_immb = match (size, is_shr) {
+ (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
+ 0b_1000_000_u32 | (64 - imm)
+ }
+ (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
+ 0b_0100_000_u32 | (32 - imm)
+ }
+ (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
+ 0b_0010_000_u32 | (16 - imm)
+ }
+ (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
+ 0b_0001_000_u32 | (8 - imm)
+ }
+ (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
+ (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
+ (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
+ (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
+ _ => panic!(
+ "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
+ op, size, imm
+ ),
+ };
+ let rn_enc = machreg_to_vec(rn);
+ let rd_enc = machreg_to_vec(rd.to_reg());
+ sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
+ }
+ &Inst::VecExtract { rd, rn, rm, imm4 } => {
+ if imm4 < 16 {
+ let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
+ let rm_enc = machreg_to_vec(rm);
+ let rn_enc = machreg_to_vec(rn);
+ let rd_enc = machreg_to_vec(rd.to_reg());
+ sink.put4(
+ template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
+ );
+ } else {
+ panic!(
+ "aarch64: Inst::VecExtract: emit: invalid extract index {}",
+ imm4
+ );
+ }
+ }
+ &Inst::VecTbl {
+ rd,
+ rn,
+ rm,
+ is_extension,
+ } => {
+ sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
+ }
+ &Inst::VecTbl2 {
+ rd,
+ rn,
+ rn2,
+ rm,
+ is_extension,
+ } => {
+ assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
+ sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
+ }
+ &Inst::FpuCmp32 { rn, rm } => {
+ sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
+ }
+ &Inst::FpuCmp64 { rn, rm } => {
+ sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
+ }
+ &Inst::FpuToInt { op, rd, rn } => {
+ let top16 = match op {
+ // FCVTZS (32/32-bit)
+ FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
+ // FCVTZU (32/32-bit)
+ FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
+ // FCVTZS (32/64-bit)
+ FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
+ // FCVTZU (32/64-bit)
+ FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
+ // FCVTZS (64/32-bit)
+ FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
+ // FCVTZU (64/32-bit)
+ FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
+ // FCVTZS (64/64-bit)
+ FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
+ // FCVTZU (64/64-bit)
+ FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
+ };
+ sink.put4(enc_fputoint(top16, rd, rn));
+ }
+ &Inst::IntToFpu { op, rd, rn } => {
+ let top16 = match op {
+ // SCVTF (32/32-bit)
+ IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
+ // UCVTF (32/32-bit)
+ IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
+ // SCVTF (64/32-bit)
+ IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
+ // UCVTF (64/32-bit)
+ IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
+ // SCVTF (32/64-bit)
+ IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
+ // UCVTF (32/64-bit)
+ IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
+ // SCVTF (64/64-bit)
+ IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
+ // UCVTF (64/64-bit)
+ IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
+ };
+ sink.put4(enc_inttofpu(top16, rd, rn));
+ }
+ &Inst::LoadFpuConst64 { rd, const_data } => {
+ let inst = Inst::FpuLoad64 {
+ rd,
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ };
+ inst.emit(sink, emit_info, state);
+ let inst = Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(12),
+ };
+ inst.emit(sink, emit_info, state);
+ sink.put8(const_data);
+ }
+ &Inst::LoadFpuConst128 { rd, const_data } => {
+ let inst = Inst::FpuLoad128 {
+ rd,
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ };
+ inst.emit(sink, emit_info, state);
+ let inst = Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(20),
+ };
+ inst.emit(sink, emit_info, state);
+
+ for i in const_data.to_le_bytes().iter() {
+ sink.put1(*i);
+ }
+ }
+ &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+ sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
+ }
+ &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+ sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
+ }
+ &Inst::FpuRound { op, rd, rn } => {
+ let top22 = match op {
+ FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
+ FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
+ FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
+ FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
+ FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
+ FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
+ FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
+ FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
+ };
+ sink.put4(enc_fround(top22, rd, rn));
+ }
+ &Inst::MovToFpu { rd, rn, size } => {
+ let template = match size {
+ ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
+ ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
+ _ => unreachable!(),
+ };
+ sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
+ }
+ &Inst::MovToVec { rd, rn, idx, size } => {
+ let (imm5, shift) = match size.lane_size() {
+ ScalarSize::Size8 => (0b00001, 1),
+ ScalarSize::Size16 => (0b00010, 2),
+ ScalarSize::Size32 => (0b00100, 3),
+ ScalarSize::Size64 => (0b01000, 4),
+ _ => unreachable!(),
+ };
+ debug_assert_eq!(idx & (0b11111 >> shift), idx);
+ let imm5 = imm5 | ((idx as u32) << shift);
+ sink.put4(
+ 0b010_01110000_00000_0_0011_1_00000_00000
+ | (imm5 << 16)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::MovFromVec { rd, rn, idx, size } => {
+ let (q, imm5, shift, mask) = match size {
+ VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
+ VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
+ VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
+ VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
+ _ => unreachable!(),
+ };
+ debug_assert_eq!(idx & mask, idx);
+ let imm5 = imm5 | ((idx as u32) << shift);
+ sink.put4(
+ 0b000_01110000_00000_0_0111_1_00000_00000
+ | (q << 30)
+ | (imm5 << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_gpr(rd.to_reg()),
+ );
+ }
+ &Inst::MovFromVecSigned {
+ rd,
+ rn,
+ idx,
+ size,
+ scalar_size,
+ } => {
+ let (imm5, shift, half) = match size {
+ VectorSize::Size8x8 => (0b00001, 1, true),
+ VectorSize::Size8x16 => (0b00001, 1, false),
+ VectorSize::Size16x4 => (0b00010, 2, true),
+ VectorSize::Size16x8 => (0b00010, 2, false),
+ VectorSize::Size32x2 => {
+ debug_assert_ne!(scalar_size, OperandSize::Size32);
+ (0b00100, 3, true)
+ }
+ VectorSize::Size32x4 => {
+ debug_assert_ne!(scalar_size, OperandSize::Size32);
+ (0b00100, 3, false)
+ }
+ _ => panic!("Unexpected vector operand size"),
+ };
+ debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
+ let imm5 = imm5 | ((idx as u32) << shift);
+ sink.put4(
+ 0b000_01110000_00000_0_0101_1_00000_00000
+ | (scalar_size.is64() as u32) << 30
+ | (imm5 << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_gpr(rd.to_reg()),
+ );
+ }
+ &Inst::VecDup { rd, rn, size } => {
+ let imm5 = match size {
+ VectorSize::Size8x16 => 0b00001,
+ VectorSize::Size16x8 => 0b00010,
+ VectorSize::Size32x4 => 0b00100,
+ VectorSize::Size64x2 => 0b01000,
+ _ => unimplemented!(),
+ };
+ sink.put4(
+ 0b010_01110000_00000_000011_00000_00000
+ | (imm5 << 16)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::VecDupFromFpu { rd, rn, size } => {
+ let imm5 = match size {
+ VectorSize::Size32x4 => 0b00100,
+ VectorSize::Size64x2 => 0b01000,
+ _ => unimplemented!(),
+ };
+ sink.put4(
+ 0b010_01110000_00000_000001_00000_00000
+ | (imm5 << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::VecDupImm {
+ rd,
+ imm,
+ invert,
+ size,
+ } => {
+ let (imm, shift, shift_ones) = imm.value();
+ let (op, cmode) = match size.lane_size() {
+ ScalarSize::Size8 => {
+ assert!(!invert);
+ assert_eq!(shift, 0);
+
+ (0, 0b1110)
+ }
+ ScalarSize::Size16 => {
+ let s = shift & 8;
+
+ assert!(!shift_ones);
+ assert_eq!(s, shift);
+
+ (invert as u32, 0b1000 | (s >> 2))
+ }
+ ScalarSize::Size32 => {
+ if shift_ones {
+ assert!(shift == 8 || shift == 16);
+
+ (invert as u32, 0b1100 | (shift >> 4))
+ } else {
+ let s = shift & 24;
+
+ assert_eq!(s, shift);
+
+ (invert as u32, 0b0000 | (s >> 2))
+ }
+ }
+ ScalarSize::Size64 => {
+ assert!(!invert);
+ assert_eq!(shift, 0);
+
+ (1, 0b1110)
+ }
+ _ => unreachable!(),
+ };
+ let q_op = op | ((size.is_128bits() as u32) << 1);
+
+ sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
+ }
+ &Inst::VecExtend {
+ t,
+ rd,
+ rn,
+ high_half,
+ } => {
+ let (u, immh) = match t {
+ VecExtendOp::Sxtl8 => (0b0, 0b001),
+ VecExtendOp::Sxtl16 => (0b0, 0b010),
+ VecExtendOp::Sxtl32 => (0b0, 0b100),
+ VecExtendOp::Uxtl8 => (0b1, 0b001),
+ VecExtendOp::Uxtl16 => (0b1, 0b010),
+ VecExtendOp::Uxtl32 => (0b1, 0b100),
+ };
+ sink.put4(
+ 0b000_011110_0000_000_101001_00000_00000
+ | ((high_half as u32) << 30)
+ | (u << 29)
+ | (immh << 19)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::VecMiscNarrow {
+ op,
+ rd,
+ rn,
+ size,
+ high_half,
+ } => {
+ let size = match size.lane_size() {
+ ScalarSize::Size8 => 0b00,
+ ScalarSize::Size16 => 0b01,
+ ScalarSize::Size32 => 0b10,
+ _ => panic!("Unexpected vector operand lane size!"),
+ };
+ let (u, bits_12_16) = match op {
+ VecMiscNarrowOp::Xtn => (0b0, 0b10010),
+ VecMiscNarrowOp::Sqxtn => (0b0, 0b10100),
+ VecMiscNarrowOp::Sqxtun => (0b1, 0b10010),
+ };
+ sink.put4(enc_vec_rr_misc(
+ ((high_half as u32) << 1) | u,
+ size,
+ bits_12_16,
+ rd,
+ rn,
+ ));
+ }
+ &Inst::VecMovElement {
+ rd,
+ rn,
+ dest_idx,
+ src_idx,
+ size,
+ } => {
+ let (imm5, shift) = match size.lane_size() {
+ ScalarSize::Size8 => (0b00001, 1),
+ ScalarSize::Size16 => (0b00010, 2),
+ ScalarSize::Size32 => (0b00100, 3),
+ ScalarSize::Size64 => (0b01000, 4),
+ _ => unreachable!(),
+ };
+ let mask = 0b11111 >> shift;
+ debug_assert_eq!(dest_idx & mask, dest_idx);
+ debug_assert_eq!(src_idx & mask, src_idx);
+ let imm4 = (src_idx as u32) << (shift - 1);
+ let imm5 = imm5 | ((dest_idx as u32) << shift);
+ sink.put4(
+ 0b011_01110000_00000_0_0000_1_00000_00000
+ | (imm5 << 16)
+ | (imm4 << 11)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::VecRRR {
+ rd,
+ rn,
+ rm,
+ alu_op,
+ size,
+ } => {
+ let (q, enc_size) = size.enc_size();
+ let is_float = match alu_op {
+ VecALUOp::Fcmeq
+ | VecALUOp::Fcmgt
+ | VecALUOp::Fcmge
+ | VecALUOp::Fadd
+ | VecALUOp::Fsub
+ | VecALUOp::Fdiv
+ | VecALUOp::Fmax
+ | VecALUOp::Fmin
+ | VecALUOp::Fmul => true,
+ _ => false,
+ };
+ let enc_float_size = match (is_float, size) {
+ (true, VectorSize::Size32x2) => 0b0,
+ (true, VectorSize::Size32x4) => 0b0,
+ (true, VectorSize::Size64x2) => 0b1,
+ (true, _) => unimplemented!(),
+ _ => 0,
+ };
+
+ let (top11, bit15_10) = match alu_op {
+ VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
+ VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
+ VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
+ VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
+ VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
+ VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
+ VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
+ VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
+ VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
+ VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
+ VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
+ VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
+ // The following logical instructions operate on bytes, so are not encoded differently
+ // for the different vector types.
+ VecALUOp::And => (0b000_01110_00_1, 0b000111),
+ VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
+ VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
+ VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
+ VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
+ VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
+ VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
+ VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
+ VecALUOp::Mul => {
+ debug_assert_ne!(size, VectorSize::Size64x2);
+ (0b000_01110_00_1 | enc_size << 1, 0b100111)
+ }
+ VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
+ VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
+ VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011),
+ VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011),
+ VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001),
+ VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001),
+ VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101),
+ VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
+ VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
+ VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
+ VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
+ VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
+ VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
+ VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
+ VecALUOp::Umlal => {
+ debug_assert!(!size.is_128bits());
+ (0b001_01110_00_1 | enc_size << 1, 0b100000)
+ }
+ VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
+ VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
+ VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
+ };
+ let top11 = match alu_op {
+ VecALUOp::Smull | VecALUOp::Smull2 => top11,
+ _ if is_float => top11 | (q << 9) | enc_float_size << 1,
+ _ => top11 | (q << 9),
+ };
+ sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
+ }
+ &Inst::VecLoadReplicate { rd, rn, size } => {
+ let (q, size) = size.enc_size();
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ sink.put4(enc_ldst_vec(q, size, rn, rd));
+ }
+ &Inst::VecCSel { rd, rn, rm, cond } => {
+ /* Emit this:
+ b.cond else
+ mov rd, rm
+ b out
+ else:
+ mov rd, rn
+ out:
+
+ Note, we could do better in the cases where rd == rn or rd == rm.
+ */
+ let else_label = sink.get_label();
+ let out_label = sink.get_label();
+
+ // b.cond else
+ let br_else_offset = sink.cur_offset();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(else_label),
+ CondBrKind::Cond(cond),
+ ));
+ sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
+
+ // mov rd, rm
+ sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
+
+ // b out
+ let b_out_offset = sink.cur_offset();
+ sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
+ sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
+ sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
+
+ // else:
+ sink.bind_label(else_label);
+
+ // mov rd, rn
+ sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
+
+ // out:
+ sink.bind_label(out_label);
+ }
+ &Inst::MovToNZCV { rn } => {
+ sink.put4(0xd51b4200 | machreg_to_gpr(rn));
+ }
+ &Inst::MovFromNZCV { rd } => {
+ sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits >= 8 => {
+ let top22 = match (signed, from_bits, to_bits) {
+ (false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32)
+ (false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32)
+ (true, 8, 32) => 0b000_100110_0_000000_000111, // SXTB (32)
+ (true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32)
+ // The 64-bit unsigned variants are the same as the 32-bit ones,
+ // because writes to Wn zero out the top 32 bits of Xn
+ (false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64)
+ (false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64)
+ (true, 8, 64) => 0b100_100110_1_000000_000111, // SXTB (64)
+ (true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64)
+ // 32-to-64: the unsigned case is a 'mov' (special-cased below).
+ (false, 32, 64) => 0, // MOV
+ (true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64)
+ _ => panic!(
+ "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}",
+ signed, from_bits, to_bits
+ ),
+ };
+ if top22 != 0 {
+ sink.put4(enc_extend(top22, rd, rn));
+ } else {
+ Inst::mov32(rd, rn).emit(sink, emit_info, state);
+ }
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits == 1 && signed => {
+ assert!(to_bits <= 64);
+ // Reduce sign-extend-from-1-bit to:
+ // - and rd, rn, #1
+ // - sub rd, zr, rd
+
+ // We don't have ImmLogic yet, so we just hardcode this. FIXME.
+ sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+ let sub_inst = Inst::AluRRR {
+ alu_op: ALUOp::Sub64,
+ rd,
+ rn: zero_reg(),
+ rm: rd.to_reg(),
+ };
+ sub_inst.emit(sink, emit_info, state);
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits == 1 && !signed => {
+ assert!(to_bits <= 64);
+ // Reduce zero-extend-from-1-bit to:
+ // - and rd, rn, #1
+
+ // We don't have ImmLogic yet, so we just hardcode this. FIXME.
+ sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+ }
+ &Inst::Extend { .. } => {
+ panic!("Unsupported extend variant");
+ }
+ &Inst::Jump { ref dest } => {
+ let off = sink.cur_offset();
+ // Indicate that the jump uses a label, if so, so that a fixup can occur later.
+ if let Some(l) = dest.as_label() {
+ sink.use_label_at_offset(off, l, LabelUse::Branch26);
+ sink.add_uncond_branch(off, off + 4, l);
+ }
+ // Emit the jump itself.
+ sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
+ }
+ &Inst::Ret => {
+ sink.put4(0xd65f03c0);
+ }
+ &Inst::EpiloguePlaceholder => {
+ // Noop; this is just a placeholder for epilogues.
+ }
+ &Inst::Call { ref info } => {
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+ }
+ let loc = state.cur_srcloc();
+ sink.add_reloc(loc, Reloc::Arm64Call, &info.dest, 0);
+ sink.put4(enc_jump26(0b100101, 0));
+ if info.opcode.is_call() {
+ sink.add_call_site(loc, info.opcode);
+ }
+ }
+ &Inst::CallInd { ref info } => {
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+ }
+ sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
+ let loc = state.cur_srcloc();
+ if info.opcode.is_call() {
+ sink.add_call_site(loc, info.opcode);
+ }
+ }
+ &Inst::CondBr {
+ taken,
+ not_taken,
+ kind,
+ } => {
+ // Conditional part first.
+ let cond_off = sink.cur_offset();
+ if let Some(l) = taken.as_label() {
+ sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
+ let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
+ sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
+ }
+ sink.put4(enc_conditional_br(taken, kind));
+
+ // Unconditional part next.
+ let uncond_off = sink.cur_offset();
+ if let Some(l) = not_taken.as_label() {
+ sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
+ sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
+ }
+ sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
+ }
+ &Inst::TrapIf { kind, trap_code } => {
+ // condbr KIND, LABEL
+ let off = sink.cur_offset();
+ let label = sink.get_label();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(label),
+ kind.invert(),
+ ));
+ sink.use_label_at_offset(off, label, LabelUse::Branch19);
+ // udf
+ let trap = Inst::Udf { trap_code };
+ trap.emit(sink, emit_info, state);
+ // LABEL:
+ sink.bind_label(label);
+ }
+ &Inst::IndirectBr { rn, .. } => {
+ sink.put4(enc_br(rn));
+ }
+ &Inst::Nop0 => {}
+ &Inst::Nop4 => {
+ sink.put4(0xd503201f);
+ }
+ &Inst::Brk => {
+ sink.put4(0xd4200000);
+ }
+ &Inst::Udf { trap_code } => {
+ let srcloc = state.cur_srcloc();
+ sink.add_trap(srcloc, trap_code);
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+ }
+ sink.put4(0xd4a00000);
+ }
+ &Inst::Adr { rd, off } => {
+ assert!(off > -(1 << 20));
+ assert!(off < (1 << 20));
+ sink.put4(enc_adr(off, rd));
+ }
+ &Inst::Word4 { data } => {
+ sink.put4(data);
+ }
+ &Inst::Word8 { data } => {
+ sink.put8(data);
+ }
+ &Inst::JTSequence {
+ ridx,
+ rtmp1,
+ rtmp2,
+ ref info,
+ ..
+ } => {
+ // This sequence is *one* instruction in the vcode, and is expanded only here at
+ // emission time, because we cannot allow the regalloc to insert spills/reloads in
+ // the middle; we depend on hardcoded PC-rel addressing below.
+
+ // Branch to default when condition code from prior comparison indicates.
+ let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
+ // No need to inform the sink's branch folding logic about this branch, because it
+ // will not be merged with any other branch, flipped, or elided (it is not preceded
+ // or succeeded by any other branch). Just emit it with the label use.
+ let default_br_offset = sink.cur_offset();
+ if let BranchTarget::Label(l) = info.default_target {
+ sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
+ }
+ sink.put4(br);
+
+ // Save index in a tmp (the live range of ridx only goes to start of this
+ // sequence; rtmp1 or rtmp2 may overwrite it).
+ let inst = Inst::gen_move(rtmp2, ridx, I64);
+ inst.emit(sink, emit_info, state);
+ // Load address of jump table
+ let inst = Inst::Adr { rd: rtmp1, off: 16 };
+ inst.emit(sink, emit_info, state);
+ // Load value out of jump table
+ let inst = Inst::SLoad32 {
+ rd: rtmp2,
+ mem: AMode::reg_plus_reg_scaled_extended(
+ rtmp1.to_reg(),
+ rtmp2.to_reg(),
+ I32,
+ ExtendOp::UXTW,
+ ),
+ flags: MemFlags::trusted(),
+ };
+ inst.emit(sink, emit_info, state);
+ // Add base of jump table to jump-table-sourced block offset
+ let inst = Inst::AluRRR {
+ alu_op: ALUOp::Add64,
+ rd: rtmp1,
+ rn: rtmp1.to_reg(),
+ rm: rtmp2.to_reg(),
+ };
+ inst.emit(sink, emit_info, state);
+ // Branch to computed address. (`targets` here is only used for successor queries
+ // and is not needed for emission.)
+ let inst = Inst::IndirectBr {
+ rn: rtmp1.to_reg(),
+ targets: vec![],
+ };
+ inst.emit(sink, emit_info, state);
+ // Emit jump table (table of 32-bit offsets).
+ let jt_off = sink.cur_offset();
+ for &target in info.targets.iter() {
+ let word_off = sink.cur_offset();
+ // off_into_table is an addend here embedded in the label to be later patched
+ // at the end of codegen. The offset is initially relative to this jump table
+ // entry; with the extra addend, it'll be relative to the jump table's start,
+ // after patching.
+ let off_into_table = word_off - jt_off;
+ sink.use_label_at_offset(
+ word_off,
+ target.as_label().unwrap(),
+ LabelUse::PCRel32,
+ );
+ sink.put4(off_into_table);
+ }
+
+ // Lowering produces an EmitIsland before using a JTSequence, so we can safely
+ // disable the worst-case-size check in this case.
+ start_off = sink.cur_offset();
+ }
+ &Inst::LoadExtName {
+ rd,
+ ref name,
+ offset,
+ } => {
+ let inst = Inst::ULoad64 {
+ rd,
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ };
+ inst.emit(sink, emit_info, state);
+ let inst = Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(12),
+ };
+ inst.emit(sink, emit_info, state);
+ let srcloc = state.cur_srcloc();
+ sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
+ if emit_info.flags().emit_all_ones_funcaddrs() {
+ sink.put8(u64::max_value());
+ } else {
+ sink.put8(0);
+ }
+ }
+ &Inst::LoadAddr { rd, ref mem } => {
+ let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+
+ let (reg, index_reg, offset) = match mem {
+ AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
+ AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
+ AMode::UnsignedOffset(r, uimm12scaled) => {
+ (r, None, uimm12scaled.value() as i32)
+ }
+ _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
+ };
+ let abs_offset = if offset < 0 {
+ -offset as u64
+ } else {
+ offset as u64
+ };
+ let alu_op = if offset < 0 {
+ ALUOp::Sub64
+ } else {
+ ALUOp::Add64
+ };
+
+ if let Some((idx, extendop)) = index_reg {
+ let add = Inst::AluRRRExtend {
+ alu_op: ALUOp::Add64,
+ rd,
+ rn: reg,
+ rm: idx,
+ extendop,
+ };
+
+ add.emit(sink, emit_info, state);
+ } else if offset == 0 {
+ if reg != rd.to_reg() {
+ let mov = Inst::mov(rd, reg);
+
+ mov.emit(sink, emit_info, state);
+ }
+ } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
+ let add = Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn: reg,
+ imm12,
+ };
+ add.emit(sink, emit_info, state);
+ } else {
+ // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
+ // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
+ // that no other instructions will be inserted here (we're emitting directly),
+ // and a live range of `tmp2` should not span this instruction, so this use
+ // should otherwise be correct.
+ debug_assert!(rd.to_reg() != tmp2_reg());
+ debug_assert!(reg != tmp2_reg());
+ let tmp = writable_tmp2_reg();
+ for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
+ insn.emit(sink, emit_info, state);
+ }
+ let add = Inst::AluRRR {
+ alu_op,
+ rd,
+ rn: reg,
+ rm: tmp.to_reg(),
+ };
+ add.emit(sink, emit_info, state);
+ }
+ }
+ &Inst::VirtualSPOffsetAdj { offset } => {
+ debug!(
+ "virtual sp offset adjusted by {} -> {}",
+ offset,
+ state.virtual_sp_offset + offset,
+ );
+ state.virtual_sp_offset += offset;
+ }
+ &Inst::EmitIsland { needed_space } => {
+ if sink.island_needed(needed_space + 4) {
+ let jump_around_label = sink.get_label();
+ let jmp = Inst::Jump {
+ dest: BranchTarget::Label(jump_around_label),
+ };
+ jmp.emit(sink, emit_info, state);
+ sink.emit_island();
+ sink.bind_label(jump_around_label);
+ }
+ }
+ }
+
+ let end_off = sink.cur_offset();
+ debug_assert!((end_off - start_off) <= Inst::worst_case_size());
+
+ state.clear_post_insn();
+ }
+
+ fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+ self.print_with_state(mb_rru, state)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
new file mode 100644
index 0000000000..eb31963b5d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -0,0 +1,5143 @@
+use crate::ir::types::*;
+use crate::isa::aarch64::inst::*;
+use crate::isa::test_utils;
+use crate::isa::CallConv;
+use crate::settings;
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+
+#[test]
+fn test_aarch64_binemit() {
+ let mut insns = Vec::<(Inst, &str, &str)>::new();
+
+ // N.B.: the architecture is little-endian, so when transcribing the 32-bit
+ // hex instructions from e.g. objdump disassembly, one must swap the bytes
+ // seen below. (E.g., a `ret` is normally written as the u32 `D65F03C0`,
+ // but we write it here as C0035FD6.)
+
+ // Useful helper script to produce the encodings from the text:
+ //
+ // #!/bin/sh
+ // tmp=`mktemp /tmp/XXXXXXXX.o`
+ // aarch64-linux-gnu-as /dev/stdin -o $tmp
+ // aarch64-linux-gnu-objdump -d $tmp
+ // rm -f $tmp
+ //
+ // Then:
+ //
+ // $ echo "mov x1, x2" | aarch64inst.sh
+ insns.push((Inst::Ret, "C0035FD6", "ret"));
+ insns.push((Inst::Nop0, "", "nop-zero-len"));
+ insns.push((Inst::Nop4, "1F2003D5", "nop"));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100030B",
+ "add w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Add64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400068B",
+ "add x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Sub32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100034B",
+ "sub w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Sub64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006CB",
+ "sub x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Orr32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100032A",
+ "orr w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Orr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006AA",
+ "orr x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::And32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100030A",
+ "and w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::And64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400068A",
+ "and x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SubS32,
+ rd: writable_zero_reg(),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "5F00036B",
+ // TODO: Display as cmp
+ "subs wzr, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SubS32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100036B",
+ "subs w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SubS64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006EB",
+ "subs x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::AddS32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100032B",
+ "adds w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::AddS64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006AB",
+ "adds x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::AddS64,
+ rd: writable_zero_reg(),
+ rn: xreg(5),
+ imm12: Imm12::maybe_from_u64(1).unwrap(),
+ },
+ "BF0400B1",
+ // TODO: Display as cmn.
+ "adds xzr, x5, #1",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SDiv64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40CC69A",
+ "sdiv x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::UDiv64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A408C69A",
+ "udiv x4, x5, x6",
+ ));
+
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Eor32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400064A",
+ "eor w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Eor64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006CA",
+ "eor x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::AndNot32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400260A",
+ "bic w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::AndNot64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400268A",
+ "bic x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::OrrNot32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400262A",
+ "orn w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::OrrNot64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40026AA",
+ "orn x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::EorNot32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400264A",
+ "eon w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::EorNot64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40026CA",
+ "eon x4, x5, x6",
+ ));
+
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::RotR32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A42CC61A",
+ "ror w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::RotR64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A42CC69A",
+ "ror x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsr32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A424C61A",
+ "lsr w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A424C69A",
+ "lsr x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Asr32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A428C61A",
+ "asr w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Asr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A428C69A",
+ "asr x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsl32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A420C61A",
+ "lsl w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsl64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A420C69A",
+ "lsl x4, x5, x6",
+ ));
+
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D0411",
+ "add w7, w8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: true,
+ },
+ },
+ "078D4411",
+ "add w7, w8, #1191936",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add64,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D0491",
+ "add x7, x8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D0451",
+ "sub w7, w8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub64,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D04D1",
+ "sub x7, x8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::SubS32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D0471",
+ "subs w7, w8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::SubS64,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D04F1",
+ "subs x7, x8, #291",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ rm: xreg(9),
+ extendop: ExtendOp::SXTB,
+ },
+ "0781290B",
+ "add w7, w8, w9, SXTB",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::Add64,
+ rd: writable_xreg(15),
+ rn: xreg(16),
+ rm: xreg(17),
+ extendop: ExtendOp::UXTB,
+ },
+ "0F02318B",
+ "add x15, x16, x17, UXTB",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::Sub32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ extendop: ExtendOp::SXTH,
+ },
+ "41A0234B",
+ "sub w1, w2, w3, SXTH",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::Sub64,
+ rd: writable_xreg(20),
+ rn: xreg(21),
+ rm: xreg(22),
+ extendop: ExtendOp::UXTW,
+ },
+ "B44236CB",
+ "sub x20, x21, x22, UXTW",
+ ));
+
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(20).unwrap(),
+ ),
+ },
+ "6A510C0B",
+ "add w10, w11, w12, LSL 20",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Add64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::ASR,
+ ShiftOpShiftImm::maybe_from_shift(42).unwrap(),
+ ),
+ },
+ "6AA98C8B",
+ "add x10, x11, x12, ASR 42",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sub32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C4B",
+ "sub w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sub64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CCB",
+ "sub x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Orr32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C2A",
+ "orr w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Orr64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CAA",
+ "orr x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::And32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C0A",
+ "and w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::And64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C8A",
+ "and x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Eor32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C4A",
+ "eor w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Eor64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CCA",
+ "eor x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::OrrNot32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2C2A",
+ "orn w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::OrrNot64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2CAA",
+ "orn x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::AndNot32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2C0A",
+ "bic w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::AndNot64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2C8A",
+ "bic x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::EorNot32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2C4A",
+ "eon w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::EorNot64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2CCA",
+ "eon x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::AddS32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C2B",
+ "adds w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::AddS64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CAB",
+ "adds x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::SubS32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C6B",
+ "subs w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::SubS64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CEB",
+ "subs x10, x11, x12, LSL 23",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::SubS64,
+ rd: writable_zero_reg(),
+ rn: stack_reg(),
+ rm: xreg(12),
+ extendop: ExtendOp::UXTX,
+ },
+ "FF632CEB",
+ "subs xzr, sp, x12, UXTX",
+ ));
+
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp3::MAdd32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ ra: xreg(4),
+ },
+ "4110031B",
+ "madd w1, w2, w3, w4",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp3::MAdd64,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ ra: xreg(4),
+ },
+ "4110039B",
+ "madd x1, x2, x3, x4",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp3::MSub32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ ra: xreg(4),
+ },
+ "4190031B",
+ "msub w1, w2, w3, w4",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp3::MSub64,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ ra: xreg(4),
+ },
+ "4190039B",
+ "msub x1, x2, x3, x4",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SMulH,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "417C439B",
+ "smulh x1, x2, x3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::UMulH,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "417CC39B",
+ "umulh x1, x2, x3",
+ ));
+
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::RotR32,
+ rd: writable_xreg(20),
+ rn: xreg(21),
+ immshift: ImmShift::maybe_from_u64(19).unwrap(),
+ },
+ "B44E9513",
+ "ror w20, w21, #19",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::RotR64,
+ rd: writable_xreg(20),
+ rn: xreg(21),
+ immshift: ImmShift::maybe_from_u64(42).unwrap(),
+ },
+ "B4AAD593",
+ "ror x20, x21, #42",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsr32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ immshift: ImmShift::maybe_from_u64(13).unwrap(),
+ },
+ "6A7D0D53",
+ "lsr w10, w11, #13",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsr64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ immshift: ImmShift::maybe_from_u64(57).unwrap(),
+ },
+ "6AFD79D3",
+ "lsr x10, x11, #57",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Asr32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ immshift: ImmShift::maybe_from_u64(7).unwrap(),
+ },
+ "A47C0713",
+ "asr w4, w5, #7",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Asr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ immshift: ImmShift::maybe_from_u64(35).unwrap(),
+ },
+ "A4FC6393",
+ "asr x4, x5, #35",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl32,
+ rd: writable_xreg(8),
+ rn: xreg(9),
+ immshift: ImmShift::maybe_from_u64(24).unwrap(),
+ },
+ "281D0853",
+ "lsl w8, w9, #24",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl64,
+ rd: writable_xreg(8),
+ rn: xreg(9),
+ immshift: ImmShift::maybe_from_u64(63).unwrap(),
+ },
+ "280141D3",
+ "lsl x8, x9, #63",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ immshift: ImmShift::maybe_from_u64(0).unwrap(),
+ },
+ "6A7D0053",
+ "lsl w10, w11, #0",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ immshift: ImmShift::maybe_from_u64(0).unwrap(),
+ },
+ "6AFD40D3",
+ "lsl x10, x11, #0",
+ ));
+
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::And32,
+ rd: writable_xreg(21),
+ rn: xreg(27),
+ imml: ImmLogic::maybe_from_u64(0x80003fff, I32).unwrap(),
+ },
+ "753B0112",
+ "and w21, w27, #2147500031",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::And64,
+ rd: writable_xreg(7),
+ rn: xreg(6),
+ imml: ImmLogic::maybe_from_u64(0x3fff80003fff800, I64).unwrap(),
+ },
+ "C7381592",
+ "and x7, x6, #288221580125796352",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::Orr32,
+ rd: writable_xreg(1),
+ rn: xreg(5),
+ imml: ImmLogic::maybe_from_u64(0x100000, I32).unwrap(),
+ },
+ "A1000C32",
+ "orr w1, w5, #1048576",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::Orr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(),
+ },
+ "A4C401B2",
+ "orr x4, x5, #9331882296111890817",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::Eor32,
+ rd: writable_xreg(1),
+ rn: xreg(5),
+ imml: ImmLogic::maybe_from_u64(0x00007fff, I32).unwrap(),
+ },
+ "A1380052",
+ "eor w1, w5, #32767",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::Eor64,
+ rd: writable_xreg(10),
+ rn: xreg(8),
+ imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(),
+ },
+ "0AC501D2",
+ "eor x10, x8, #9331882296111890817",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::RBit32,
+ rd: writable_xreg(1),
+ rn: xreg(10),
+ },
+ "4101C05A",
+ "rbit w1, w10",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::RBit64,
+ rd: writable_xreg(1),
+ rn: xreg(10),
+ },
+ "4101C0DA",
+ "rbit x1, x10",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::Clz32,
+ rd: writable_xreg(15),
+ rn: xreg(3),
+ },
+ "6F10C05A",
+ "clz w15, w3",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::Clz64,
+ rd: writable_xreg(15),
+ rn: xreg(3),
+ },
+ "6F10C0DA",
+ "clz x15, x3",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::Cls32,
+ rd: writable_xreg(21),
+ rn: xreg(16),
+ },
+ "1516C05A",
+ "cls w21, w16",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::Cls64,
+ rd: writable_xreg(21),
+ rn: xreg(16),
+ },
+ "1516C0DA",
+ "cls x21, x16",
+ ));
+
+ insns.push((
+ Inst::ULoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41004038",
+ "ldurb w1, [x2]",
+ ));
+ insns.push((
+ Inst::ULoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::zero(I8)),
+ flags: MemFlags::trusted(),
+ },
+ "41004039",
+ "ldrb w1, [x2]",
+ ));
+ insns.push((
+ Inst::ULoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::RegReg(xreg(2), xreg(5)),
+ flags: MemFlags::trusted(),
+ },
+ "41686538",
+ "ldrb w1, [x2, x5]",
+ ));
+ insns.push((
+ Inst::SLoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41008038",
+ "ldursb x1, [x2]",
+ ));
+ insns.push((
+ Inst::SLoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(63, I8).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC8039",
+ "ldrsb x1, [x2, #63]",
+ ));
+ insns.push((
+ Inst::SLoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::RegReg(xreg(2), xreg(5)),
+ flags: MemFlags::trusted(),
+ },
+ "4168A538",
+ "ldrsb x1, [x2, x5]",
+ ));
+ insns.push((
+ Inst::ULoad16 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(5).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41504078",
+ "ldurh w1, [x2, #5]",
+ ));
+ insns.push((
+ Inst::ULoad16 {
+ rd: writable_xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8, I16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41104079",
+ "ldrh w1, [x2, #8]",
+ ));
+ insns.push((
+ Inst::ULoad16 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaled(xreg(2), xreg(3), I16),
+ flags: MemFlags::trusted(),
+ },
+ "41786378",
+ "ldrh w1, [x2, x3, LSL #1]",
+ ));
+ insns.push((
+ Inst::SLoad16 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41008078",
+ "ldursh x1, [x2]",
+ ));
+ insns.push((
+ Inst::SLoad16 {
+ rd: writable_xreg(28),
+ mem: AMode::UnsignedOffset(xreg(20), UImm12Scaled::maybe_from_i64(24, I16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "9C328079",
+ "ldrsh x28, [x20, #24]",
+ ));
+ insns.push((
+ Inst::SLoad16 {
+ rd: writable_xreg(28),
+ mem: AMode::RegScaled(xreg(20), xreg(20), I16),
+ flags: MemFlags::trusted(),
+ },
+ "9C7AB478",
+ "ldrsh x28, [x20, x20, LSL #1]",
+ ));
+ insns.push((
+ Inst::ULoad32 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410040B8",
+ "ldur w1, [x2]",
+ ));
+ insns.push((
+ Inst::ULoad32 {
+ rd: writable_xreg(12),
+ mem: AMode::UnsignedOffset(xreg(0), UImm12Scaled::maybe_from_i64(204, I32).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "0CCC40B9",
+ "ldr w12, [x0, #204]",
+ ));
+ insns.push((
+ Inst::ULoad32 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaled(xreg(2), xreg(12), I32),
+ flags: MemFlags::trusted(),
+ },
+ "41786CB8",
+ "ldr w1, [x2, x12, LSL #2]",
+ ));
+ insns.push((
+ Inst::SLoad32 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410080B8",
+ "ldursw x1, [x2]",
+ ));
+ insns.push((
+ Inst::SLoad32 {
+ rd: writable_xreg(12),
+ mem: AMode::UnsignedOffset(xreg(1), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "2CFCBFB9",
+ "ldrsw x12, [x1, #16380]",
+ ));
+ insns.push((
+ Inst::SLoad32 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaled(xreg(5), xreg(1), I32),
+ flags: MemFlags::trusted(),
+ },
+ "A178A1B8",
+ "ldrsw x1, [x5, x1, LSL #2]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410040F8",
+ "ldur x1, [x2]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(-256).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410050F8",
+ "ldur x1, [x2, #-256]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(255).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41F04FF8",
+ "ldur x1, [x2, #255]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC7FF9",
+ "ldr x1, [x2, #32760]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegReg(xreg(2), xreg(3)),
+ flags: MemFlags::trusted(),
+ },
+ "416863F8",
+ "ldr x1, [x2, x3]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaled(xreg(2), xreg(3), I64),
+ flags: MemFlags::trusted(),
+ },
+ "417863F8",
+ "ldr x1, [x2, x3, LSL #3]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::SXTW),
+ flags: MemFlags::trusted(),
+ },
+ "41D863F8",
+ "ldr x1, [x2, w3, SXTW #3]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::SXTW),
+ flags: MemFlags::trusted(),
+ },
+ "41C863F8",
+ "ldr x1, [x2, w3, SXTW]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::Label(MemLabel::PCRel(64)),
+ flags: MemFlags::trusted(),
+ },
+ "01020058",
+ "ldr x1, pc+64",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410C41F8",
+ "ldr x1, [x2, #16]!",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410441F8",
+ "ldr x1, [x2], #16",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::FPOffset(32768, I8),
+ flags: MemFlags::trusted(),
+ },
+ "100090D2B063308B010240F9",
+ "movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::FPOffset(-32768, I8),
+ flags: MemFlags::trusted(),
+ },
+ "F0FF8F92B063308B010240F9",
+ "movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::FPOffset(1048576, I8), // 2^20
+ flags: MemFlags::trusted(),
+ },
+ "1002A0D2B063308B010240F9",
+ "movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1
+ flags: MemFlags::trusted(),
+ },
+ "300080521002A072B063308B010240F9",
+ "movz w16, #1 ; movk w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+ ));
+
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegOffset(xreg(7), 8, I64),
+ flags: MemFlags::trusted(),
+ },
+ "E18040F8",
+ "ldur x1, [x7, #8]",
+ ));
+
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegOffset(xreg(7), 1024, I64),
+ flags: MemFlags::trusted(),
+ },
+ "E10042F9",
+ "ldr x1, [x7, #1024]",
+ ));
+
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegOffset(xreg(7), 1048576, I64),
+ flags: MemFlags::trusted(),
+ },
+ "1002A0D2F060308B010240F9",
+ "movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]",
+ ));
+
+ insns.push((
+ Inst::Store8 {
+ rd: xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41000038",
+ "sturb w1, [x2]",
+ ));
+ insns.push((
+ Inst::Store8 {
+ rd: xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(4095, I8).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC3F39",
+ "strb w1, [x2, #4095]",
+ ));
+ insns.push((
+ Inst::Store16 {
+ rd: xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41000078",
+ "sturh w1, [x2]",
+ ));
+ insns.push((
+ Inst::Store16 {
+ rd: xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8190, I16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC3F79",
+ "strh w1, [x2, #8190]",
+ ));
+ insns.push((
+ Inst::Store32 {
+ rd: xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410000B8",
+ "stur w1, [x2]",
+ ));
+ insns.push((
+ Inst::Store32 {
+ rd: xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC3FB9",
+ "str w1, [x2, #16380]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410000F8",
+ "stur x1, [x2]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC3FF9",
+ "str x1, [x2, #32760]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::RegReg(xreg(2), xreg(3)),
+ flags: MemFlags::trusted(),
+ },
+ "416823F8",
+ "str x1, [x2, x3]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::RegScaled(xreg(2), xreg(3), I64),
+ flags: MemFlags::trusted(),
+ },
+ "417823F8",
+ "str x1, [x2, x3, LSL #3]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::UXTW),
+ flags: MemFlags::trusted(),
+ },
+ "415823F8",
+ "str x1, [x2, w3, UXTW #3]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::UXTW),
+ flags: MemFlags::trusted(),
+ },
+ "414823F8",
+ "str x1, [x2, w3, UXTW]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410C01F8",
+ "str x1, [x2, #16]!",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410401F8",
+ "str x1, [x2], #16",
+ ));
+
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(8),
+ rt2: xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)),
+ flags: MemFlags::trusted(),
+ },
+ "482500A9",
+ "stp x8, x9, [x10]",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(8),
+ rt2: xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "48A51FA9",
+ "stp x8, x9, [x10, #504]",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(8),
+ rt2: xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "48253CA9",
+ "stp x8, x9, [x10, #-64]",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(21),
+ rt2: xreg(28),
+ mem: PairAMode::SignedOffset(xreg(1), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "357020A9",
+ "stp x21, x28, [x1, #-512]",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(8),
+ rt2: xreg(9),
+ mem: PairAMode::PreIndexed(
+ writable_xreg(10),
+ SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ },
+ "4825BCA9",
+ "stp x8, x9, [x10, #-64]!",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(15),
+ rt2: xreg(16),
+ mem: PairAMode::PostIndexed(
+ writable_xreg(20),
+ SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ },
+ "8FC29FA8",
+ "stp x15, x16, [x20], #504",
+ ));
+
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)),
+ flags: MemFlags::trusted(),
+ },
+ "482540A9",
+ "ldp x8, x9, [x10]",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "48A55FA9",
+ "ldp x8, x9, [x10, #504]",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "48257CA9",
+ "ldp x8, x9, [x10, #-64]",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "482560A9",
+ "ldp x8, x9, [x10, #-512]",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::PreIndexed(
+ writable_xreg(10),
+ SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ },
+ "4825FCA9",
+ "ldp x8, x9, [x10, #-64]!",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(25),
+ mem: PairAMode::PostIndexed(
+ writable_xreg(12),
+ SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ },
+ "88E5DFA8",
+ "ldp x8, x25, [x12], #504",
+ ));
+
+ insns.push((
+ Inst::Mov64 {
+ rd: writable_xreg(8),
+ rm: xreg(9),
+ },
+ "E80309AA",
+ "mov x8, x9",
+ ));
+ insns.push((
+ Inst::Mov32 {
+ rd: writable_xreg(8),
+ rm: xreg(9),
+ },
+ "E803092A",
+ "mov w8, w9",
+ ));
+
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FF9FD2",
+ "movz x8, #65535",
+ ));
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFBFD2",
+ "movz x8, #65535, LSL #16",
+ ));
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFDFD2",
+ "movz x8, #65535, LSL #32",
+ ));
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFFFD2",
+ "movz x8, #65535, LSL #48",
+ ));
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+ size: OperandSize::Size32,
+ },
+ "E8FFBF52",
+ "movz w8, #65535, LSL #16",
+ ));
+
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FF9F92",
+ "movn x8, #65535",
+ ));
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFBF92",
+ "movn x8, #65535, LSL #16",
+ ));
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFDF92",
+ "movn x8, #65535, LSL #32",
+ ));
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFFF92",
+ "movn x8, #65535, LSL #48",
+ ));
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+ size: OperandSize::Size32,
+ },
+ "E8FF9F12",
+ "movn w8, #65535",
+ ));
+
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(12),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "0C0080F2",
+ "movk x12, #0",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(19),
+ imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "1300A0F2",
+ "movk x19, #0, LSL #16",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(3),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E3FF9FF2",
+ "movk x3, #65535",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFBFF2",
+ "movk x8, #65535, LSL #16",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFDFF2",
+ "movk x8, #65535, LSL #32",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFFFF2",
+ "movk x8, #65535, LSL #48",
+ ));
+
+ insns.push((
+ Inst::CSel {
+ rd: writable_xreg(10),
+ rn: xreg(12),
+ rm: xreg(14),
+ cond: Cond::Hs,
+ },
+ "8A218E9A",
+ "csel x10, x12, x14, hs",
+ ));
+ insns.push((
+ Inst::CSet {
+ rd: writable_xreg(15),
+ cond: Cond::Ge,
+ },
+ "EFB79F9A",
+ "cset x15, ge",
+ ));
+ insns.push((
+ Inst::CCmpImm {
+ size: OperandSize::Size64,
+ rn: xreg(22),
+ imm: UImm5::maybe_from_u8(5).unwrap(),
+ nzcv: NZCV::new(false, false, true, true),
+ cond: Cond::Eq,
+ },
+ "C30A45FA",
+ "ccmp x22, #5, #nzCV, eq",
+ ));
+ insns.push((
+ Inst::CCmpImm {
+ size: OperandSize::Size32,
+ rn: xreg(3),
+ imm: UImm5::maybe_from_u8(30).unwrap(),
+ nzcv: NZCV::new(true, true, true, true),
+ cond: Cond::Gt,
+ },
+ "6FC85E7A",
+ "ccmp w3, #30, #NZCV, gt",
+ ));
+ insns.push((
+ Inst::MovToFpu {
+ rd: writable_vreg(31),
+ rn: xreg(0),
+ size: ScalarSize::Size64,
+ },
+ "1F00679E",
+ "fmov d31, x0",
+ ));
+ insns.push((
+ Inst::MovToFpu {
+ rd: writable_vreg(1),
+ rn: xreg(28),
+ size: ScalarSize::Size32,
+ },
+ "8103271E",
+ "fmov s1, w28",
+ ));
+ insns.push((
+ Inst::MovToVec {
+ rd: writable_vreg(0),
+ rn: xreg(0),
+ idx: 7,
+ size: VectorSize::Size8x8,
+ },
+ "001C0F4E",
+ "mov v0.b[7], w0",
+ ));
+ insns.push((
+ Inst::MovToVec {
+ rd: writable_vreg(20),
+ rn: xreg(21),
+ idx: 0,
+ size: VectorSize::Size64x2,
+ },
+ "B41E084E",
+ "mov v20.d[0], x21",
+ ));
+ insns.push((
+ Inst::MovFromVec {
+ rd: writable_xreg(3),
+ rn: vreg(27),
+ idx: 14,
+ size: VectorSize::Size8x16,
+ },
+ "633F1D0E",
+ "umov w3, v27.b[14]",
+ ));
+ insns.push((
+ Inst::MovFromVec {
+ rd: writable_xreg(24),
+ rn: vreg(5),
+ idx: 3,
+ size: VectorSize::Size16x8,
+ },
+ "B83C0E0E",
+ "umov w24, v5.h[3]",
+ ));
+ insns.push((
+ Inst::MovFromVec {
+ rd: writable_xreg(12),
+ rn: vreg(17),
+ idx: 1,
+ size: VectorSize::Size32x4,
+ },
+ "2C3E0C0E",
+ "mov w12, v17.s[1]",
+ ));
+ insns.push((
+ Inst::MovFromVec {
+ rd: writable_xreg(21),
+ rn: vreg(20),
+ idx: 0,
+ size: VectorSize::Size64x2,
+ },
+ "953E084E",
+ "mov x21, v20.d[0]",
+ ));
+ insns.push((
+ Inst::MovFromVecSigned {
+ rd: writable_xreg(0),
+ rn: vreg(0),
+ idx: 15,
+ size: VectorSize::Size8x16,
+ scalar_size: OperandSize::Size32,
+ },
+ "002C1F0E",
+ "smov w0, v0.b[15]",
+ ));
+ insns.push((
+ Inst::MovFromVecSigned {
+ rd: writable_xreg(12),
+ rn: vreg(13),
+ idx: 7,
+ size: VectorSize::Size8x8,
+ scalar_size: OperandSize::Size64,
+ },
+ "AC2D0F4E",
+ "smov x12, v13.b[7]",
+ ));
+ insns.push((
+ Inst::MovFromVecSigned {
+ rd: writable_xreg(23),
+ rn: vreg(31),
+ idx: 7,
+ size: VectorSize::Size16x8,
+ scalar_size: OperandSize::Size32,
+ },
+ "F72F1E0E",
+ "smov w23, v31.h[7]",
+ ));
+ insns.push((
+ Inst::MovFromVecSigned {
+ rd: writable_xreg(24),
+ rn: vreg(5),
+ idx: 1,
+ size: VectorSize::Size32x2,
+ scalar_size: OperandSize::Size64,
+ },
+ "B82C0C4E",
+ "smov x24, v5.s[1]",
+ ));
+ insns.push((
+ Inst::MovToNZCV { rn: xreg(13) },
+ "0D421BD5",
+ "msr nzcv, x13",
+ ));
+ insns.push((
+ Inst::MovFromNZCV {
+ rd: writable_xreg(27),
+ },
+ "1B423BD5",
+ "mrs x27, nzcv",
+ ));
+ insns.push((
+ Inst::VecDup {
+ rd: writable_vreg(25),
+ rn: xreg(7),
+ size: VectorSize::Size8x16,
+ },
+ "F90C014E",
+ "dup v25.16b, w7",
+ ));
+ insns.push((
+ Inst::VecDup {
+ rd: writable_vreg(2),
+ rn: xreg(23),
+ size: VectorSize::Size16x8,
+ },
+ "E20E024E",
+ "dup v2.8h, w23",
+ ));
+ insns.push((
+ Inst::VecDup {
+ rd: writable_vreg(0),
+ rn: xreg(28),
+ size: VectorSize::Size32x4,
+ },
+ "800F044E",
+ "dup v0.4s, w28",
+ ));
+ insns.push((
+ Inst::VecDup {
+ rd: writable_vreg(31),
+ rn: xreg(5),
+ size: VectorSize::Size64x2,
+ },
+ "BF0C084E",
+ "dup v31.2d, x5",
+ ));
+ insns.push((
+ Inst::VecDupFromFpu {
+ rd: writable_vreg(14),
+ rn: vreg(19),
+ size: VectorSize::Size32x4,
+ },
+ "6E06044E",
+ "dup v14.4s, v19.s[0]",
+ ));
+ insns.push((
+ Inst::VecDupFromFpu {
+ rd: writable_vreg(18),
+ rn: vreg(10),
+ size: VectorSize::Size64x2,
+ },
+ "5205084E",
+ "dup v18.2d, v10.d[0]",
+ ));
+ insns.push((
+ Inst::VecDupImm {
+ rd: writable_vreg(31),
+ imm: ASIMDMovModImm::maybe_from_u64(255, ScalarSize::Size8).unwrap(),
+ invert: false,
+ size: VectorSize::Size8x16,
+ },
+ "FFE7074F",
+ "movi v31.16b, #255",
+ ));
+ insns.push((
+ Inst::VecDupImm {
+ rd: writable_vreg(0),
+ imm: ASIMDMovModImm::zero(),
+ invert: true,
+ size: VectorSize::Size16x4,
+ },
+ "0084002F",
+ "mvni v0.4h, #0",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Sxtl8,
+ rd: writable_vreg(4),
+ rn: vreg(27),
+ high_half: false,
+ },
+ "64A7080F",
+ "sxtl v4.8h, v27.8b",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Sxtl16,
+ rd: writable_vreg(17),
+ rn: vreg(19),
+ high_half: true,
+ },
+ "71A6104F",
+ "sxtl2 v17.4s, v19.8h",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Sxtl32,
+ rd: writable_vreg(30),
+ rn: vreg(6),
+ high_half: false,
+ },
+ "DEA4200F",
+ "sxtl v30.2d, v6.2s",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Uxtl8,
+ rd: writable_vreg(3),
+ rn: vreg(29),
+ high_half: true,
+ },
+ "A3A7086F",
+ "uxtl2 v3.8h, v29.16b",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Uxtl16,
+ rd: writable_vreg(15),
+ rn: vreg(12),
+ high_half: false,
+ },
+ "8FA5102F",
+ "uxtl v15.4s, v12.4h",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Uxtl32,
+ rd: writable_vreg(28),
+ rn: vreg(2),
+ high_half: true,
+ },
+ "5CA4206F",
+ "uxtl2 v28.2d, v2.4s",
+ ));
+
+ insns.push((
+ Inst::VecMovElement {
+ rd: writable_vreg(0),
+ rn: vreg(31),
+ dest_idx: 7,
+ src_idx: 7,
+ size: VectorSize::Size16x8,
+ },
+ "E0771E6E",
+ "mov v0.h[7], v31.h[7]",
+ ));
+
+ insns.push((
+ Inst::VecMovElement {
+ rd: writable_vreg(31),
+ rn: vreg(16),
+ dest_idx: 1,
+ src_idx: 0,
+ size: VectorSize::Size32x2,
+ },
+ "1F060C6E",
+ "mov v31.s[1], v16.s[0]",
+ ));
+
+ insns.push((
+ Inst::VecMiscNarrow {
+ op: VecMiscNarrowOp::Xtn,
+ rd: writable_vreg(22),
+ rn: vreg(8),
+ size: VectorSize::Size32x2,
+ high_half: false,
+ },
+ "1629A10E",
+ "xtn v22.2s, v8.2d",
+ ));
+
+ insns.push((
+ Inst::VecMiscNarrow {
+ op: VecMiscNarrowOp::Sqxtn,
+ rd: writable_vreg(31),
+ rn: vreg(0),
+ size: VectorSize::Size16x8,
+ high_half: true,
+ },
+ "1F48614E",
+ "sqxtn2 v31.8h, v0.4s",
+ ));
+
+ insns.push((
+ Inst::VecMiscNarrow {
+ op: VecMiscNarrowOp::Sqxtun,
+ rd: writable_vreg(16),
+ rn: vreg(23),
+ size: VectorSize::Size8x16,
+ high_half: false,
+ },
+ "F02A212E",
+ "sqxtun v16.8b, v23.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqadd,
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "410C284E",
+ "sqadd v1.16b, v2.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqadd,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "810D7C4E",
+ "sqadd v1.8h, v12.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqadd,
+ rd: writable_vreg(12),
+ rn: vreg(2),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "4C0CA64E",
+ "sqadd v12.4s, v2.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqadd,
+ rd: writable_vreg(20),
+ rn: vreg(7),
+ rm: vreg(13),
+ size: VectorSize::Size64x2,
+ },
+ "F40CED4E",
+ "sqadd v20.2d, v7.2d, v13.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqsub,
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "412C284E",
+ "sqsub v1.16b, v2.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqsub,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "812D7C4E",
+ "sqsub v1.8h, v12.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqsub,
+ rd: writable_vreg(12),
+ rn: vreg(2),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "4C2CA64E",
+ "sqsub v12.4s, v2.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqsub,
+ rd: writable_vreg(20),
+ rn: vreg(7),
+ rm: vreg(13),
+ size: VectorSize::Size64x2,
+ },
+ "F42CED4E",
+ "sqsub v20.2d, v7.2d, v13.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqadd,
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "410C286E",
+ "uqadd v1.16b, v2.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqadd,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "810D7C6E",
+ "uqadd v1.8h, v12.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqadd,
+ rd: writable_vreg(12),
+ rn: vreg(2),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "4C0CA66E",
+ "uqadd v12.4s, v2.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqadd,
+ rd: writable_vreg(20),
+ rn: vreg(7),
+ rm: vreg(13),
+ size: VectorSize::Size64x2,
+ },
+ "F40CED6E",
+ "uqadd v20.2d, v7.2d, v13.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqsub,
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "412C286E",
+ "uqsub v1.16b, v2.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqsub,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "812D7C6E",
+ "uqsub v1.8h, v12.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqsub,
+ rd: writable_vreg(12),
+ rn: vreg(2),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "4C2CA66E",
+ "uqsub v12.4s, v2.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqsub,
+ rd: writable_vreg(20),
+ rn: vreg(7),
+ rm: vreg(13),
+ size: VectorSize::Size64x2,
+ },
+ "F42CED6E",
+ "uqsub v20.2d, v7.2d, v13.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmeq,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size8x16,
+ },
+ "E38E386E",
+ "cmeq v3.16b, v23.16b, v24.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmgt,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size8x16,
+ },
+ "E336384E",
+ "cmgt v3.16b, v23.16b, v24.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmge,
+ rd: writable_vreg(23),
+ rn: vreg(9),
+ rm: vreg(12),
+ size: VectorSize::Size8x16,
+ },
+ "373D2C4E",
+ "cmge v23.16b, v9.16b, v12.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhi,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "2534216E",
+ "cmhi v5.16b, v1.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhs,
+ rd: writable_vreg(8),
+ rn: vreg(2),
+ rm: vreg(15),
+ size: VectorSize::Size8x16,
+ },
+ "483C2F6E",
+ "cmhs v8.16b, v2.16b, v15.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmeq,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size16x8,
+ },
+ "E38E786E",
+ "cmeq v3.8h, v23.8h, v24.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmgt,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size16x8,
+ },
+ "E336784E",
+ "cmgt v3.8h, v23.8h, v24.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmge,
+ rd: writable_vreg(23),
+ rn: vreg(9),
+ rm: vreg(12),
+ size: VectorSize::Size16x8,
+ },
+ "373D6C4E",
+ "cmge v23.8h, v9.8h, v12.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhi,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size16x8,
+ },
+ "2534616E",
+ "cmhi v5.8h, v1.8h, v1.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhs,
+ rd: writable_vreg(8),
+ rn: vreg(2),
+ rm: vreg(15),
+ size: VectorSize::Size16x8,
+ },
+ "483C6F6E",
+ "cmhs v8.8h, v2.8h, v15.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmeq,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size32x4,
+ },
+ "E38EB86E",
+ "cmeq v3.4s, v23.4s, v24.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmgt,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size32x4,
+ },
+ "E336B84E",
+ "cmgt v3.4s, v23.4s, v24.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmge,
+ rd: writable_vreg(23),
+ rn: vreg(9),
+ rm: vreg(12),
+ size: VectorSize::Size32x4,
+ },
+ "373DAC4E",
+ "cmge v23.4s, v9.4s, v12.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhi,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size32x4,
+ },
+ "2534A16E",
+ "cmhi v5.4s, v1.4s, v1.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhs,
+ rd: writable_vreg(8),
+ rn: vreg(2),
+ rm: vreg(15),
+ size: VectorSize::Size32x4,
+ },
+ "483CAF6E",
+ "cmhs v8.4s, v2.4s, v15.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fcmeq,
+ rd: writable_vreg(28),
+ rn: vreg(12),
+ rm: vreg(4),
+ size: VectorSize::Size32x2,
+ },
+ "9CE5240E",
+ "fcmeq v28.2s, v12.2s, v4.2s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fcmgt,
+ rd: writable_vreg(3),
+ rn: vreg(16),
+ rm: vreg(31),
+ size: VectorSize::Size64x2,
+ },
+ "03E6FF6E",
+ "fcmgt v3.2d, v16.2d, v31.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fcmge,
+ rd: writable_vreg(18),
+ rn: vreg(23),
+ rm: vreg(0),
+ size: VectorSize::Size64x2,
+ },
+ "F2E6606E",
+ "fcmge v18.2d, v23.2d, v0.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::And,
+ rd: writable_vreg(20),
+ rn: vreg(19),
+ rm: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "741E324E",
+ "and v20.16b, v19.16b, v18.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Bic,
+ rd: writable_vreg(8),
+ rn: vreg(11),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "681D614E",
+ "bic v8.16b, v11.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Orr,
+ rd: writable_vreg(15),
+ rn: vreg(2),
+ rm: vreg(12),
+ size: VectorSize::Size16x8,
+ },
+ "4F1CAC4E",
+ "orr v15.16b, v2.16b, v12.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Eor,
+ rd: writable_vreg(18),
+ rn: vreg(3),
+ rm: vreg(22),
+ size: VectorSize::Size8x16,
+ },
+ "721C366E",
+ "eor v18.16b, v3.16b, v22.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Bsl,
+ rd: writable_vreg(8),
+ rn: vreg(9),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "281D616E",
+ "bsl v8.16b, v9.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umaxp,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "88A5216E",
+ "umaxp v8.16b, v12.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umaxp,
+ rd: writable_vreg(1),
+ rn: vreg(6),
+ rm: vreg(1),
+ size: VectorSize::Size16x8,
+ },
+ "C1A4616E",
+ "umaxp v1.8h, v6.8h, v1.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umaxp,
+ rd: writable_vreg(1),
+ rn: vreg(20),
+ rm: vreg(16),
+ size: VectorSize::Size32x4,
+ },
+ "81A6B06E",
+ "umaxp v1.4s, v20.4s, v16.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Add,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "2584214E",
+ "add v5.16b, v1.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Add,
+ rd: writable_vreg(7),
+ rn: vreg(13),
+ rm: vreg(2),
+ size: VectorSize::Size16x8,
+ },
+ "A785624E",
+ "add v7.8h, v13.8h, v2.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Add,
+ rd: writable_vreg(18),
+ rn: vreg(9),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "3285A64E",
+ "add v18.4s, v9.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Add,
+ rd: writable_vreg(1),
+ rn: vreg(3),
+ rm: vreg(2),
+ size: VectorSize::Size64x2,
+ },
+ "6184E24E",
+ "add v1.2d, v3.2d, v2.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sub,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "2584216E",
+ "sub v5.16b, v1.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sub,
+ rd: writable_vreg(7),
+ rn: vreg(13),
+ rm: vreg(2),
+ size: VectorSize::Size16x8,
+ },
+ "A785626E",
+ "sub v7.8h, v13.8h, v2.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sub,
+ rd: writable_vreg(18),
+ rn: vreg(9),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "3285A66E",
+ "sub v18.4s, v9.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sub,
+ rd: writable_vreg(18),
+ rn: vreg(0),
+ rm: vreg(8),
+ size: VectorSize::Size64x2,
+ },
+ "1284E86E",
+ "sub v18.2d, v0.2d, v8.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Mul,
+ rd: writable_vreg(25),
+ rn: vreg(9),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "399D284E",
+ "mul v25.16b, v9.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Mul,
+ rd: writable_vreg(30),
+ rn: vreg(30),
+ rm: vreg(12),
+ size: VectorSize::Size16x8,
+ },
+ "DE9F6C4E",
+ "mul v30.8h, v30.8h, v12.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Mul,
+ rd: writable_vreg(18),
+ rn: vreg(18),
+ rm: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "529EB24E",
+ "mul v18.4s, v18.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Ushl,
+ rd: writable_vreg(18),
+ rn: vreg(18),
+ rm: vreg(18),
+ size: VectorSize::Size8x16,
+ },
+ "5246326E",
+ "ushl v18.16b, v18.16b, v18.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Ushl,
+ rd: writable_vreg(18),
+ rn: vreg(18),
+ rm: vreg(18),
+ size: VectorSize::Size16x8,
+ },
+ "5246726E",
+ "ushl v18.8h, v18.8h, v18.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Ushl,
+ rd: writable_vreg(18),
+ rn: vreg(1),
+ rm: vreg(21),
+ size: VectorSize::Size32x4,
+ },
+ "3244B56E",
+ "ushl v18.4s, v1.4s, v21.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Ushl,
+ rd: writable_vreg(5),
+ rn: vreg(7),
+ rm: vreg(19),
+ size: VectorSize::Size64x2,
+ },
+ "E544F36E",
+ "ushl v5.2d, v7.2d, v19.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sshl,
+ rd: writable_vreg(18),
+ rn: vreg(18),
+ rm: vreg(18),
+ size: VectorSize::Size8x16,
+ },
+ "5246324E",
+ "sshl v18.16b, v18.16b, v18.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sshl,
+ rd: writable_vreg(30),
+ rn: vreg(1),
+ rm: vreg(29),
+ size: VectorSize::Size16x8,
+ },
+ "3E447D4E",
+ "sshl v30.8h, v1.8h, v29.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sshl,
+ rd: writable_vreg(8),
+ rn: vreg(22),
+ rm: vreg(21),
+ size: VectorSize::Size32x4,
+ },
+ "C846B54E",
+ "sshl v8.4s, v22.4s, v21.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sshl,
+ rd: writable_vreg(8),
+ rn: vreg(22),
+ rm: vreg(2),
+ size: VectorSize::Size64x2,
+ },
+ "C846E24E",
+ "sshl v8.2d, v22.2d, v2.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umin,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(3),
+ size: VectorSize::Size8x16,
+ },
+ "816D236E",
+ "umin v1.16b, v12.16b, v3.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umin,
+ rd: writable_vreg(30),
+ rn: vreg(20),
+ rm: vreg(10),
+ size: VectorSize::Size16x8,
+ },
+ "9E6E6A6E",
+ "umin v30.8h, v20.8h, v10.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umin,
+ rd: writable_vreg(8),
+ rn: vreg(22),
+ rm: vreg(21),
+ size: VectorSize::Size32x4,
+ },
+ "C86EB56E",
+ "umin v8.4s, v22.4s, v21.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smin,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(3),
+ size: VectorSize::Size8x16,
+ },
+ "816D234E",
+ "smin v1.16b, v12.16b, v3.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smin,
+ rd: writable_vreg(30),
+ rn: vreg(20),
+ rm: vreg(10),
+ size: VectorSize::Size16x8,
+ },
+ "9E6E6A4E",
+ "smin v30.8h, v20.8h, v10.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smin,
+ rd: writable_vreg(8),
+ rn: vreg(22),
+ rm: vreg(21),
+ size: VectorSize::Size32x4,
+ },
+ "C86EB54E",
+ "smin v8.4s, v22.4s, v21.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umax,
+ rd: writable_vreg(6),
+ rn: vreg(9),
+ rm: vreg(8),
+ size: VectorSize::Size8x8,
+ },
+ "2665282E",
+ "umax v6.8b, v9.8b, v8.8b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umax,
+ rd: writable_vreg(11),
+ rn: vreg(13),
+ rm: vreg(2),
+ size: VectorSize::Size16x8,
+ },
+ "AB65626E",
+ "umax v11.8h, v13.8h, v2.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umax,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "8865AE6E",
+ "umax v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smax,
+ rd: writable_vreg(6),
+ rn: vreg(9),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "2665284E",
+ "smax v6.16b, v9.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smax,
+ rd: writable_vreg(11),
+ rn: vreg(13),
+ rm: vreg(2),
+ size: VectorSize::Size16x8,
+ },
+ "AB65624E",
+ "smax v11.8h, v13.8h, v2.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smax,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "8865AE4E",
+ "smax v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Urhadd,
+ rd: writable_vreg(8),
+ rn: vreg(1),
+ rm: vreg(3),
+ size: VectorSize::Size8x16,
+ },
+ "2814236E",
+ "urhadd v8.16b, v1.16b, v3.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Urhadd,
+ rd: writable_vreg(2),
+ rn: vreg(13),
+ rm: vreg(6),
+ size: VectorSize::Size16x8,
+ },
+ "A215666E",
+ "urhadd v2.8h, v13.8h, v6.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Urhadd,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "8815AE6E",
+ "urhadd v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fadd,
+ rd: writable_vreg(31),
+ rn: vreg(0),
+ rm: vreg(16),
+ size: VectorSize::Size32x4,
+ },
+ "1FD4304E",
+ "fadd v31.4s, v0.4s, v16.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fsub,
+ rd: writable_vreg(8),
+ rn: vreg(7),
+ rm: vreg(15),
+ size: VectorSize::Size64x2,
+ },
+ "E8D4EF4E",
+ "fsub v8.2d, v7.2d, v15.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fdiv,
+ rd: writable_vreg(1),
+ rn: vreg(3),
+ rm: vreg(4),
+ size: VectorSize::Size32x4,
+ },
+ "61FC246E",
+ "fdiv v1.4s, v3.4s, v4.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmax,
+ rd: writable_vreg(31),
+ rn: vreg(16),
+ rm: vreg(0),
+ size: VectorSize::Size64x2,
+ },
+ "1FF6604E",
+ "fmax v31.2d, v16.2d, v0.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmin,
+ rd: writable_vreg(5),
+ rn: vreg(19),
+ rm: vreg(26),
+ size: VectorSize::Size32x4,
+ },
+ "65F6BA4E",
+ "fmin v5.4s, v19.4s, v26.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmul,
+ rd: writable_vreg(2),
+ rn: vreg(0),
+ rm: vreg(5),
+ size: VectorSize::Size64x2,
+ },
+ "02DC656E",
+ "fmul v2.2d, v0.2d, v5.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Addp,
+ rd: writable_vreg(16),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "90BD214E",
+ "addp v16.16b, v12.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Addp,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "88BDAE4E",
+ "addp v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umlal,
+ rd: writable_vreg(9),
+ rn: vreg(20),
+ rm: vreg(17),
+ size: VectorSize::Size32x2,
+ },
+ "8982B12E",
+ "umlal v9.2d, v20.2s, v17.2s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: writable_vreg(16),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "9039014E",
+ "zip1 v16.16b, v12.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: writable_vreg(2),
+ rn: vreg(13),
+ rm: vreg(6),
+ size: VectorSize::Size16x8,
+ },
+ "A239464E",
+ "zip1 v2.8h, v13.8h, v6.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "88398E4E",
+ "zip1 v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: writable_vreg(9),
+ rn: vreg(20),
+ rm: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "893AD14E",
+ "zip1 v9.2d, v20.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull,
+ rd: writable_vreg(16),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "90C1210E",
+ "smull v16.8h, v12.8b, v1.8b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull,
+ rd: writable_vreg(2),
+ rn: vreg(13),
+ rm: vreg(6),
+ size: VectorSize::Size16x8,
+ },
+ "A2C1660E",
+ "smull v2.4s, v13.4h, v6.4h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "88C1AE0E",
+ "smull v8.2d, v12.2s, v14.2s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull2,
+ rd: writable_vreg(16),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "90C1214E",
+ "smull2 v16.8h, v12.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull2,
+ rd: writable_vreg(2),
+ rn: vreg(13),
+ rm: vreg(6),
+ size: VectorSize::Size16x8,
+ },
+ "A2C1664E",
+ "smull2 v2.4s, v13.8h, v6.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull2,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "88C1AE4E",
+ "smull2 v8.2d, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Not,
+ rd: writable_vreg(20),
+ rn: vreg(17),
+ size: VectorSize::Size8x8,
+ },
+ "345A202E",
+ "mvn v20.8b, v17.8b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Not,
+ rd: writable_vreg(2),
+ rn: vreg(1),
+ size: VectorSize::Size32x4,
+ },
+ "2258206E",
+ "mvn v2.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(3),
+ rn: vreg(7),
+ size: VectorSize::Size8x8,
+ },
+ "E3B8202E",
+ "neg v3.8b, v7.8b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ size: VectorSize::Size8x16,
+ },
+ "88B9206E",
+ "neg v8.16b, v12.16b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(0),
+ rn: vreg(31),
+ size: VectorSize::Size16x8,
+ },
+ "E0BB606E",
+ "neg v0.8h, v31.8h",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(2),
+ rn: vreg(3),
+ size: VectorSize::Size32x4,
+ },
+ "62B8A06E",
+ "neg v2.4s, v3.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(10),
+ rn: vreg(8),
+ size: VectorSize::Size64x2,
+ },
+ "0AB9E06E",
+ "neg v10.2d, v8.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(3),
+ rn: vreg(1),
+ size: VectorSize::Size8x8,
+ },
+ "23B8200E",
+ "abs v3.8b, v1.8b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(1),
+ rn: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "21B8204E",
+ "abs v1.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(29),
+ rn: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "9DBB604E",
+ "abs v29.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(7),
+ rn: vreg(8),
+ size: VectorSize::Size32x4,
+ },
+ "07B9A04E",
+ "abs v7.4s, v8.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(1),
+ rn: vreg(10),
+ size: VectorSize::Size64x2,
+ },
+ "41B9E04E",
+ "abs v1.2d, v10.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fabs,
+ rd: writable_vreg(15),
+ rn: vreg(16),
+ size: VectorSize::Size32x4,
+ },
+ "0FFAA04E",
+ "fabs v15.4s, v16.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fneg,
+ rd: writable_vreg(31),
+ rn: vreg(0),
+ size: VectorSize::Size32x4,
+ },
+ "1FF8A06E",
+ "fneg v31.4s, v0.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fsqrt,
+ rd: writable_vreg(7),
+ rn: vreg(18),
+ size: VectorSize::Size64x2,
+ },
+ "47FAE16E",
+ "fsqrt v7.2d, v18.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Rev64,
+ rd: writable_vreg(1),
+ rn: vreg(10),
+ size: VectorSize::Size32x4,
+ },
+ "4109A04E",
+ "rev64 v1.4s, v10.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Shll,
+ rd: writable_vreg(12),
+ rn: vreg(5),
+ size: VectorSize::Size8x8,
+ },
+ "AC38212E",
+ "shll v12.8h, v5.8b, #8",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Shll,
+ rd: writable_vreg(9),
+ rn: vreg(1),
+ size: VectorSize::Size16x4,
+ },
+ "2938612E",
+ "shll v9.4s, v1.4h, #16",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Shll,
+ rd: writable_vreg(1),
+ rn: vreg(10),
+ size: VectorSize::Size32x2,
+ },
+ "4139A12E",
+ "shll v1.2d, v10.2s, #32",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fcvtzs,
+ rd: writable_vreg(4),
+ rn: vreg(22),
+ size: VectorSize::Size32x4,
+ },
+ "C4BAA14E",
+ "fcvtzs v4.4s, v22.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fcvtzu,
+ rd: writable_vreg(29),
+ rn: vreg(15),
+ size: VectorSize::Size64x2,
+ },
+ "FDB9E16E",
+ "fcvtzu v29.2d, v15.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Scvtf,
+ rd: writable_vreg(20),
+ rn: vreg(8),
+ size: VectorSize::Size32x4,
+ },
+ "14D9214E",
+ "scvtf v20.4s, v8.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Ucvtf,
+ rd: writable_vreg(10),
+ rn: vreg(19),
+ size: VectorSize::Size64x2,
+ },
+ "6ADA616E",
+ "ucvtf v10.2d, v19.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintn,
+ rd: writable_vreg(11),
+ rn: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "4B8A214E",
+ "frintn v11.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintn,
+ rd: writable_vreg(12),
+ rn: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "2C8A614E",
+ "frintn v12.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintz,
+ rd: writable_vreg(11),
+ rn: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "4B9AA14E",
+ "frintz v11.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintz,
+ rd: writable_vreg(12),
+ rn: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "2C9AE14E",
+ "frintz v12.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintm,
+ rd: writable_vreg(11),
+ rn: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "4B9A214E",
+ "frintm v11.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintm,
+ rd: writable_vreg(12),
+ rn: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "2C9A614E",
+ "frintm v12.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintp,
+ rd: writable_vreg(11),
+ rn: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "4B8AA14E",
+ "frintp v11.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintp,
+ rd: writable_vreg(12),
+ rn: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "2C8AE14E",
+ "frintp v12.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Uminv,
+ rd: writable_vreg(2),
+ rn: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "22A8316E",
+ "uminv b2, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Uminv,
+ rd: writable_vreg(3),
+ rn: vreg(11),
+ size: VectorSize::Size16x8,
+ },
+ "63A9716E",
+ "uminv h3, v11.8h",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Uminv,
+ rd: writable_vreg(18),
+ rn: vreg(4),
+ size: VectorSize::Size32x4,
+ },
+ "92A8B16E",
+ "uminv s18, v4.4s",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: writable_vreg(2),
+ rn: vreg(29),
+ size: VectorSize::Size8x16,
+ },
+ "A2BB314E",
+ "addv b2, v29.16b",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: writable_vreg(3),
+ rn: vreg(21),
+ size: VectorSize::Size16x8,
+ },
+ "A3BA714E",
+ "addv h3, v21.8h",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: writable_vreg(18),
+ rn: vreg(5),
+ size: VectorSize::Size32x4,
+ },
+ "B2B8B14E",
+ "addv s18, v5.4s",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Shl,
+ rd: writable_vreg(27),
+ rn: vreg(5),
+ imm: 7,
+ size: VectorSize::Size8x16,
+ },
+ "BB540F4F",
+ "shl v27.16b, v5.16b, #7",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Shl,
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ imm: 0,
+ size: VectorSize::Size8x16,
+ },
+ "C157084F",
+ "shl v1.16b, v30.16b, #0",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Sshr,
+ rd: writable_vreg(26),
+ rn: vreg(6),
+ imm: 16,
+ size: VectorSize::Size16x8,
+ },
+ "DA04104F",
+ "sshr v26.8h, v6.8h, #16",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Sshr,
+ rd: writable_vreg(3),
+ rn: vreg(19),
+ imm: 1,
+ size: VectorSize::Size16x8,
+ },
+ "63061F4F",
+ "sshr v3.8h, v19.8h, #1",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Ushr,
+ rd: writable_vreg(25),
+ rn: vreg(6),
+ imm: 32,
+ size: VectorSize::Size32x4,
+ },
+ "D904206F",
+ "ushr v25.4s, v6.4s, #32",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Ushr,
+ rd: writable_vreg(5),
+ rn: vreg(21),
+ imm: 1,
+ size: VectorSize::Size32x4,
+ },
+ "A5063F6F",
+ "ushr v5.4s, v21.4s, #1",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Shl,
+ rd: writable_vreg(22),
+ rn: vreg(13),
+ imm: 63,
+ size: VectorSize::Size64x2,
+ },
+ "B6557F4F",
+ "shl v22.2d, v13.2d, #63",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Shl,
+ rd: writable_vreg(23),
+ rn: vreg(9),
+ imm: 0,
+ size: VectorSize::Size64x2,
+ },
+ "3755404F",
+ "shl v23.2d, v9.2d, #0",
+ ));
+
+ insns.push((
+ Inst::VecExtract {
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ rm: vreg(17),
+ imm4: 0,
+ },
+ "C103116E",
+ "ext v1.16b, v30.16b, v17.16b, #0",
+ ));
+
+ insns.push((
+ Inst::VecExtract {
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ rm: vreg(17),
+ imm4: 8,
+ },
+ "C143116E",
+ "ext v1.16b, v30.16b, v17.16b, #8",
+ ));
+
+ insns.push((
+ Inst::VecExtract {
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ rm: vreg(17),
+ imm4: 15,
+ },
+ "C17B116E",
+ "ext v1.16b, v30.16b, v17.16b, #15",
+ ));
+
+ insns.push((
+ Inst::VecTbl {
+ rd: writable_vreg(0),
+ rn: vreg(31),
+ rm: vreg(16),
+ is_extension: false,
+ },
+ "E003104E",
+ "tbl v0.16b, { v31.16b }, v16.16b",
+ ));
+
+ insns.push((
+ Inst::VecTbl {
+ rd: writable_vreg(4),
+ rn: vreg(12),
+ rm: vreg(23),
+ is_extension: true,
+ },
+ "8411174E",
+ "tbx v4.16b, { v12.16b }, v23.16b",
+ ));
+
+ insns.push((
+ Inst::VecTbl2 {
+ rd: writable_vreg(16),
+ rn: vreg(31),
+ rn2: vreg(0),
+ rm: vreg(26),
+ is_extension: false,
+ },
+ "F0231A4E",
+ "tbl v16.16b, { v31.16b, v0.16b }, v26.16b",
+ ));
+
+ insns.push((
+ Inst::VecTbl2 {
+ rd: writable_vreg(3),
+ rn: vreg(11),
+ rn2: vreg(12),
+ rm: vreg(19),
+ is_extension: true,
+ },
+ "6331134E",
+ "tbx v3.16b, { v11.16b, v12.16b }, v19.16b",
+ ));
+
+ insns.push((
+ Inst::VecLoadReplicate {
+ rd: writable_vreg(31),
+ rn: xreg(0),
+
+ size: VectorSize::Size64x2,
+ },
+ "1FCC404D",
+ "ld1r { v31.2d }, [x0]",
+ ));
+
+ insns.push((
+ Inst::VecLoadReplicate {
+ rd: writable_vreg(0),
+ rn: xreg(25),
+
+ size: VectorSize::Size8x8,
+ },
+ "20C3400D",
+ "ld1r { v0.8b }, [x25]",
+ ));
+
+ insns.push((
+ Inst::VecCSel {
+ rd: writable_vreg(5),
+ rn: vreg(10),
+ rm: vreg(19),
+ cond: Cond::Gt,
+ },
+ "6C000054651EB34E02000014451DAA4E",
+ "vcsel v5.16b, v10.16b, v19.16b, gt (if-then-else diamond)",
+ ));
+
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 8,
+ to_bits: 32,
+ },
+ "411C0053",
+ "uxtb w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 8,
+ to_bits: 32,
+ },
+ "411C0013",
+ "sxtb w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 16,
+ to_bits: 32,
+ },
+ "413C0053",
+ "uxth w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 16,
+ to_bits: 32,
+ },
+ "413C0013",
+ "sxth w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 8,
+ to_bits: 64,
+ },
+ "411C0053",
+ "uxtb x1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 8,
+ to_bits: 64,
+ },
+ "411C4093",
+ "sxtb x1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 16,
+ to_bits: 64,
+ },
+ "413C0053",
+ "uxth x1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 16,
+ to_bits: 64,
+ },
+ "413C4093",
+ "sxth x1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 32,
+ to_bits: 64,
+ },
+ "E103022A",
+ "mov w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 32,
+ to_bits: 64,
+ },
+ "417C4093",
+ "sxtw x1, w2",
+ ));
+
+ insns.push((
+ Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(64),
+ },
+ "10000014",
+ "b 64",
+ ));
+
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::NotZero(xreg(8)),
+ },
+ "480000B40000A0D4",
+ "cbz x8, 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Zero(xreg(8)),
+ },
+ "480000B50000A0D4",
+ "cbnz x8, 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Ne),
+ },
+ "400000540000A0D4",
+ "b.eq 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Eq),
+ },
+ "410000540000A0D4",
+ "b.ne 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Lo),
+ },
+ "420000540000A0D4",
+ "b.hs 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Hs),
+ },
+ "430000540000A0D4",
+ "b.lo 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Pl),
+ },
+ "440000540000A0D4",
+ "b.mi 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Mi),
+ },
+ "450000540000A0D4",
+ "b.pl 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Vc),
+ },
+ "460000540000A0D4",
+ "b.vs 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Vs),
+ },
+ "470000540000A0D4",
+ "b.vc 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Ls),
+ },
+ "480000540000A0D4",
+ "b.hi 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Hi),
+ },
+ "490000540000A0D4",
+ "b.ls 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Lt),
+ },
+ "4A0000540000A0D4",
+ "b.ge 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Ge),
+ },
+ "4B0000540000A0D4",
+ "b.lt 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Le),
+ },
+ "4C0000540000A0D4",
+ "b.gt 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Gt),
+ },
+ "4D0000540000A0D4",
+ "b.le 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Nv),
+ },
+ "4E0000540000A0D4",
+ "b.al 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Al),
+ },
+ "4F0000540000A0D4",
+ "b.nv 8 ; udf",
+ ));
+
+ insns.push((
+ Inst::CondBr {
+ taken: BranchTarget::ResolvedOffset(64),
+ not_taken: BranchTarget::ResolvedOffset(128),
+ kind: CondBrKind::Cond(Cond::Le),
+ },
+ "0D02005420000014",
+ "b.le 64 ; b 128",
+ ));
+
+ insns.push((
+ Inst::Call {
+ info: Box::new(CallInfo {
+ dest: ExternalName::testcase("test0"),
+ uses: Vec::new(),
+ defs: Vec::new(),
+ opcode: Opcode::Call,
+ caller_callconv: CallConv::SystemV,
+ callee_callconv: CallConv::SystemV,
+ }),
+ },
+ "00000094",
+ "bl 0",
+ ));
+
+ insns.push((
+ Inst::CallInd {
+ info: Box::new(CallIndInfo {
+ rn: xreg(10),
+ uses: Vec::new(),
+ defs: Vec::new(),
+ opcode: Opcode::CallIndirect,
+ caller_callconv: CallConv::SystemV,
+ callee_callconv: CallConv::SystemV,
+ }),
+ },
+ "40013FD6",
+ "blr x10",
+ ));
+
+ insns.push((
+ Inst::IndirectBr {
+ rn: xreg(3),
+ targets: vec![],
+ },
+ "60001FD6",
+ "br x3",
+ ));
+
+ insns.push((Inst::Brk, "000020D4", "brk #0"));
+
+ insns.push((
+ Inst::Adr {
+ rd: writable_xreg(15),
+ off: (1 << 20) - 4,
+ },
+ "EFFF7F10",
+ "adr x15, pc+1048572",
+ ));
+
+ insns.push((
+ Inst::FpuMove64 {
+ rd: writable_vreg(8),
+ rn: vreg(4),
+ },
+ "881CA40E",
+ "mov v8.8b, v4.8b",
+ ));
+
+ insns.push((
+ Inst::FpuMove128 {
+ rd: writable_vreg(17),
+ rn: vreg(26),
+ },
+ "511FBA4E",
+ "mov v17.16b, v26.16b",
+ ));
+
+ insns.push((
+ Inst::FpuMoveFromVec {
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ idx: 2,
+ size: VectorSize::Size32x4,
+ },
+ "C107145E",
+ "mov s1, v30.s[2]",
+ ));
+
+ insns.push((
+ Inst::FpuMoveFromVec {
+ rd: writable_vreg(23),
+ rn: vreg(11),
+ idx: 0,
+ size: VectorSize::Size64x2,
+ },
+ "7705085E",
+ "mov d23, v11.d[0]",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Abs32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3201E",
+ "fabs s15, s30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Abs64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3601E",
+ "fabs d15, d30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Neg32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CF43211E",
+ "fneg s15, s30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Neg64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CF43611E",
+ "fneg d15, d30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Sqrt32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3211E",
+ "fsqrt s15, s30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Sqrt64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3611E",
+ "fsqrt d15, d30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Cvt32To64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3221E",
+ "fcvt d15, s30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Cvt64To32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CF43621E",
+ "fcvt s15, d30",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Add32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF2B3F1E",
+ "fadd s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Add64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF2B7F1E",
+ "fadd d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Sub32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF3B3F1E",
+ "fsub s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Sub64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF3B7F1E",
+ "fsub d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Mul32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF0B3F1E",
+ "fmul s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Mul64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF0B7F1E",
+ "fmul d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Div32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF1B3F1E",
+ "fdiv s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Div64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF1B7F1E",
+ "fdiv d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Max32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF4B3F1E",
+ "fmax s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Max64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF4B7F1E",
+ "fmax d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Min32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF5B3F1E",
+ "fmin s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Min64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF5B7F1E",
+ "fmin d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Uqadd64,
+ rd: writable_vreg(21),
+ rn: vreg(22),
+ rm: vreg(23),
+ },
+ "D50EF77E",
+ "uqadd d21, d22, d23",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Sqadd64,
+ rd: writable_vreg(21),
+ rn: vreg(22),
+ rm: vreg(23),
+ },
+ "D50EF75E",
+ "sqadd d21, d22, d23",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Uqsub64,
+ rd: writable_vreg(21),
+ rn: vreg(22),
+ rm: vreg(23),
+ },
+ "D52EF77E",
+ "uqsub d21, d22, d23",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Sqsub64,
+ rd: writable_vreg(21),
+ rn: vreg(22),
+ rm: vreg(23),
+ },
+ "D52EF75E",
+ "sqsub d21, d22, d23",
+ ));
+
+ insns.push((
+ Inst::FpuRRRR {
+ fpu_op: FPUOp3::MAdd32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ ra: vreg(1),
+ },
+ "CF071F1F",
+ "fmadd s15, s30, s31, s1",
+ ));
+
+ insns.push((
+ Inst::FpuRRRR {
+ fpu_op: FPUOp3::MAdd64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ ra: vreg(1),
+ },
+ "CF075F1F",
+ "fmadd d15, d30, d31, d1",
+ ));
+
+ insns.push((
+ Inst::FpuRRI {
+ fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()),
+ rd: writable_vreg(2),
+ rn: vreg(5),
+ },
+ "A204202F",
+ "ushr v2.2s, v5.2s, #32",
+ ));
+
+ insns.push((
+ Inst::FpuRRI {
+ fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()),
+ rd: writable_vreg(2),
+ rn: vreg(5),
+ },
+ "A204417F",
+ "ushr d2, d5, #63",
+ ));
+
+ insns.push((
+ Inst::FpuRRI {
+ fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()),
+ rd: writable_vreg(4),
+ rn: vreg(10),
+ },
+ "44553F2F",
+ "sli v4.2s, v10.2s, #31",
+ ));
+
+ insns.push((
+ Inst::FpuRRI {
+ fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()),
+ rd: writable_vreg(4),
+ rn: vreg(10),
+ },
+ "44557F7F",
+ "sli d4, d10, #63",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F32ToU32,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100391E",
+ "fcvtzu w1, s4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F32ToU64,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100399E",
+ "fcvtzu x1, s4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F32ToI32,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100381E",
+ "fcvtzs w1, s4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F32ToI64,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100389E",
+ "fcvtzs x1, s4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F64ToU32,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100791E",
+ "fcvtzu w1, d4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F64ToU64,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100799E",
+ "fcvtzu x1, d4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F64ToI32,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100781E",
+ "fcvtzs w1, d4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F64ToI64,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100789E",
+ "fcvtzs x1, d4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::U32ToF32,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100231E",
+ "ucvtf s1, w4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::I32ToF32,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100221E",
+ "scvtf s1, w4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::U32ToF64,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100631E",
+ "ucvtf d1, w4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::I32ToF64,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100621E",
+ "scvtf d1, w4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::U64ToF32,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100239E",
+ "ucvtf s1, x4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::I64ToF32,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100229E",
+ "scvtf s1, x4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::U64ToF64,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100639E",
+ "ucvtf d1, x4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::I64ToF64,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100629E",
+ "scvtf d1, x4",
+ ));
+
+ insns.push((
+ Inst::FpuCmp32 {
+ rn: vreg(23),
+ rm: vreg(24),
+ },
+ "E022381E",
+ "fcmp s23, s24",
+ ));
+
+ insns.push((
+ Inst::FpuCmp64 {
+ rn: vreg(23),
+ rm: vreg(24),
+ },
+ "E022781E",
+ "fcmp d23, d24",
+ ));
+
+ insns.push((
+ Inst::FpuLoad32 {
+ rd: writable_vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), F32),
+ flags: MemFlags::trusted(),
+ },
+ "107969BC",
+ "ldr s16, [x8, x9, LSL #2]",
+ ));
+
+ insns.push((
+ Inst::FpuLoad64 {
+ rd: writable_vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), F64),
+ flags: MemFlags::trusted(),
+ },
+ "107969FC",
+ "ldr d16, [x8, x9, LSL #3]",
+ ));
+
+ insns.push((
+ Inst::FpuLoad128 {
+ rd: writable_vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), I128),
+ flags: MemFlags::trusted(),
+ },
+ "1079E93C",
+ "ldr q16, [x8, x9, LSL #4]",
+ ));
+
+ insns.push((
+ Inst::FpuLoad32 {
+ rd: writable_vreg(16),
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ },
+ "5000001C",
+ "ldr s16, pc+8",
+ ));
+
+ insns.push((
+ Inst::FpuLoad64 {
+ rd: writable_vreg(16),
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ },
+ "5000005C",
+ "ldr d16, pc+8",
+ ));
+
+ insns.push((
+ Inst::FpuLoad128 {
+ rd: writable_vreg(16),
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ },
+ "5000009C",
+ "ldr q16, pc+8",
+ ));
+
+ insns.push((
+ Inst::FpuStore32 {
+ rd: vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), F32),
+ flags: MemFlags::trusted(),
+ },
+ "107929BC",
+ "str s16, [x8, x9, LSL #2]",
+ ));
+
+ insns.push((
+ Inst::FpuStore64 {
+ rd: vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), F64),
+ flags: MemFlags::trusted(),
+ },
+ "107929FC",
+ "str d16, [x8, x9, LSL #3]",
+ ));
+
+ insns.push((
+ Inst::FpuStore128 {
+ rd: vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), I128),
+ flags: MemFlags::trusted(),
+ },
+ "1079A93C",
+ "str q16, [x8, x9, LSL #4]",
+ ));
+
+ insns.push((
+ Inst::LoadFpuConst64 {
+ rd: writable_vreg(16),
+ const_data: 1.0_f64.to_bits(),
+ },
+ "5000005C03000014000000000000F03F",
+ "ldr d16, pc+8 ; b 12 ; data.f64 1",
+ ));
+
+ insns.push((
+ Inst::LoadFpuConst128 {
+ rd: writable_vreg(5),
+ const_data: 0x0f0e0d0c0b0a09080706050403020100,
+ },
+ "4500009C05000014000102030405060708090A0B0C0D0E0F",
+ "ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
+ ));
+
+ insns.push((
+ Inst::FpuCSel32 {
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(3),
+ cond: Cond::Hi,
+ },
+ "418C231E",
+ "fcsel s1, s2, s3, hi",
+ ));
+
+ insns.push((
+ Inst::FpuCSel64 {
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(3),
+ cond: Cond::Eq,
+ },
+ "410C631E",
+ "fcsel d1, d2, d3, eq",
+ ));
+
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Minus32,
+ },
+ "1743251E",
+ "frintm s23, s24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Minus64,
+ },
+ "1743651E",
+ "frintm d23, d24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Plus32,
+ },
+ "17C3241E",
+ "frintp s23, s24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Plus64,
+ },
+ "17C3641E",
+ "frintp d23, d24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Zero32,
+ },
+ "17C3251E",
+ "frintz s23, s24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Zero64,
+ },
+ "17C3651E",
+ "frintz d23, d24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Nearest32,
+ },
+ "1743241E",
+ "frintn s23, s24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Nearest64,
+ },
+ "1743641E",
+ "frintn d23, d24",
+ ));
+
+ insns.push((
+ Inst::AtomicRMW {
+ ty: I16,
+ op: inst_common::AtomicRmwOp::Xor,
+ },
+ "BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5",
+ "atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
+ ));
+
+ insns.push((
+ Inst::AtomicRMW {
+ ty: I32,
+ op: inst_common::AtomicRmwOp::Xchg,
+ },
+ "BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
+ "atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
+ ));
+
+ insns.push((
+ Inst::AtomicCAS {
+ ty: I8,
+ },
+ "BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
+ "atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
+ ));
+
+ insns.push((
+ Inst::AtomicCAS {
+ ty: I64,
+ },
+ "BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",
+ "atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
+ ));
+
+ insns.push((
+ Inst::AtomicLoad {
+ ty: I8,
+ r_data: writable_xreg(7),
+ r_addr: xreg(28),
+ },
+ "BF3B03D587034039",
+ "atomically { x7 = zero_extend_8_bits_at[x28] }",
+ ));
+
+ insns.push((
+ Inst::AtomicLoad {
+ ty: I64,
+ r_data: writable_xreg(28),
+ r_addr: xreg(7),
+ },
+ "BF3B03D5FC0040F9",
+ "atomically { x28 = zero_extend_64_bits_at[x7] }",
+ ));
+
+ insns.push((
+ Inst::AtomicStore {
+ ty: I16,
+ r_data: xreg(17),
+ r_addr: xreg(8),
+ },
+ "11010079BF3B03D5",
+ "atomically { 16_bits_at[x8] = x17 }",
+ ));
+
+ insns.push((
+ Inst::AtomicStore {
+ ty: I32,
+ r_data: xreg(18),
+ r_addr: xreg(7),
+ },
+ "F20000B9BF3B03D5",
+ "atomically { 32_bits_at[x7] = x18 }",
+ ));
+
+ insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish"));
+
+ let flags = settings::Flags::new(settings::builder());
+ let rru = create_reg_universe(&flags);
+ let emit_info = EmitInfo::new(flags);
+ for (insn, expected_encoding, expected_printing) in insns {
+ println!(
+ "AArch64: {:?}, {}, {}",
+ insn, expected_encoding, expected_printing
+ );
+
+ // Check the printed text is as expected.
+ let actual_printing = insn.show_rru(Some(&rru));
+ assert_eq!(expected_printing, actual_printing);
+
+ let mut sink = test_utils::TestCodeSink::new();
+ let mut buffer = MachBuffer::new();
+ insn.emit(&mut buffer, &emit_info, &mut Default::default());
+ let buffer = buffer.finish();
+ buffer.emit(&mut sink);
+ let actual_encoding = &sink.stringify();
+ assert_eq!(expected_encoding, actual_encoding);
+ }
+}
+
+#[test]
+fn test_cond_invert() {
+ for cond in vec![
+ Cond::Eq,
+ Cond::Ne,
+ Cond::Hs,
+ Cond::Lo,
+ Cond::Mi,
+ Cond::Pl,
+ Cond::Vs,
+ Cond::Vc,
+ Cond::Hi,
+ Cond::Ls,
+ Cond::Ge,
+ Cond::Lt,
+ Cond::Gt,
+ Cond::Le,
+ Cond::Al,
+ Cond::Nv,
+ ]
+ .into_iter()
+ {
+ assert_eq!(cond.invert().invert(), cond);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
new file mode 100644
index 0000000000..b6da0402bc
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
@@ -0,0 +1,1025 @@
+//! AArch64 ISA definitions: immediate constants.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#[allow(dead_code)]
+use crate::ir::types::*;
+use crate::ir::Type;
+use crate::isa::aarch64::inst::{OperandSize, ScalarSize};
+
+use regalloc::{PrettyPrint, RealRegUniverse};
+
+use core::convert::TryFrom;
+use std::string::String;
+
+/// An immediate that represents the NZCV flags.
+#[derive(Clone, Copy, Debug)]
+pub struct NZCV {
+ /// The negative condition flag.
+ n: bool,
+ /// The zero condition flag.
+ z: bool,
+ /// The carry condition flag.
+ c: bool,
+ /// The overflow condition flag.
+ v: bool,
+}
+
+impl NZCV {
+ pub fn new(n: bool, z: bool, c: bool, v: bool) -> NZCV {
+ NZCV { n, z, c, v }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ (u32::from(self.n) << 3)
+ | (u32::from(self.z) << 2)
+ | (u32::from(self.c) << 1)
+ | u32::from(self.v)
+ }
+}
+
+/// An unsigned 5-bit immediate.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm5 {
+ /// The value.
+ value: u8,
+}
+
+impl UImm5 {
+ pub fn maybe_from_u8(value: u8) -> Option<UImm5> {
+ if value < 32 {
+ Some(UImm5 { value })
+ } else {
+ None
+ }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ u32::from(self.value)
+ }
+}
+
+/// A signed, scaled 7-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm7Scaled {
+ /// The value.
+ pub value: i16,
+ /// multiplied by the size of this type
+ pub scale_ty: Type,
+}
+
+impl SImm7Scaled {
+ /// Create a SImm7Scaled from a raw offset and the known scale type, if
+ /// possible.
+ pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
+ assert!(scale_ty == I64 || scale_ty == I32);
+ let scale = scale_ty.bytes();
+ assert!(scale.is_power_of_two());
+ let scale = i64::from(scale);
+ let upper_limit = 63 * scale;
+ let lower_limit = -(64 * scale);
+ if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
+ Some(SImm7Scaled {
+ value: i16::try_from(value).unwrap(),
+ scale_ty,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero(scale_ty: Type) -> SImm7Scaled {
+ SImm7Scaled { value: 0, scale_ty }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ let ty_bytes: i16 = self.scale_ty.bytes() as i16;
+ let scaled: i16 = self.value / ty_bytes;
+ assert!(scaled <= 63 && scaled >= -64);
+ let scaled: i8 = scaled as i8;
+ let encoded: u32 = scaled as u32;
+ encoded & 0x7f
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct FPULeftShiftImm {
+ pub amount: u8,
+ pub lane_size_in_bits: u8,
+}
+
+impl FPULeftShiftImm {
+ pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
+ debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
+ if amount < lane_size_in_bits {
+ Some(Self {
+ amount,
+ lane_size_in_bits,
+ })
+ } else {
+ None
+ }
+ }
+
+ pub fn enc(&self) -> u32 {
+ debug_assert!(self.lane_size_in_bits.is_power_of_two());
+ debug_assert!(self.lane_size_in_bits > self.amount);
+ // The encoding of the immediate follows the table below,
+ // where xs encode the shift amount.
+ //
+ // | lane_size_in_bits | encoding |
+ // +------------------------------+
+ // | 8 | 0001xxx |
+ // | 16 | 001xxxx |
+ // | 32 | 01xxxxx |
+ // | 64 | 1xxxxxx |
+ //
+ // The highest one bit is represented by `lane_size_in_bits`. Since
+ // `lane_size_in_bits` is a power of 2 and `amount` is less
+ // than `lane_size_in_bits`, they can be ORed
+ // together to produced the encoded value.
+ u32::from(self.lane_size_in_bits | self.amount)
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct FPURightShiftImm {
+ pub amount: u8,
+ pub lane_size_in_bits: u8,
+}
+
+impl FPURightShiftImm {
+ pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
+ debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
+ if amount > 0 && amount <= lane_size_in_bits {
+ Some(Self {
+ amount,
+ lane_size_in_bits,
+ })
+ } else {
+ None
+ }
+ }
+
+ pub fn enc(&self) -> u32 {
+ debug_assert_ne!(0, self.amount);
+ // The encoding of the immediate follows the table below,
+ // where xs encodes the negated shift amount.
+ //
+ // | lane_size_in_bits | encoding |
+ // +------------------------------+
+ // | 8 | 0001xxx |
+ // | 16 | 001xxxx |
+ // | 32 | 01xxxxx |
+ // | 64 | 1xxxxxx |
+ //
+ // The shift amount is negated such that a shift ammount
+ // of 1 (in 64-bit) is encoded as 0b111111 and a shift
+ // amount of 64 is encoded as 0b000000,
+ // in the bottom 6 bits.
+ u32::from((self.lane_size_in_bits * 2) - self.amount)
+ }
+}
+
+/// a 9-bit signed offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm9 {
+ /// The value.
+ pub value: i16,
+}
+
+impl SImm9 {
+ /// Create a signed 9-bit offset from a full-range value, if possible.
+ pub fn maybe_from_i64(value: i64) -> Option<SImm9> {
+ if value >= -256 && value <= 255 {
+ Some(SImm9 {
+ value: value as i16,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero() -> SImm9 {
+ SImm9 { value: 0 }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ (self.value as u32) & 0x1ff
+ }
+
+ /// Signed value of immediate.
+ pub fn value(&self) -> i32 {
+ self.value as i32
+ }
+}
+
+/// An unsigned, scaled 12-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm12Scaled {
+ /// The value.
+ pub value: u16,
+ /// multiplied by the size of this type
+ pub scale_ty: Type,
+}
+
+impl UImm12Scaled {
+ /// Create a UImm12Scaled from a raw offset and the known scale type, if
+ /// possible.
+ pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
+ // Ensure the type is at least one byte.
+ let scale_ty = if scale_ty == B1 { B8 } else { scale_ty };
+
+ let scale = scale_ty.bytes();
+ assert!(scale.is_power_of_two());
+ let scale = scale as i64;
+ let limit = 4095 * scale;
+ if value >= 0 && value <= limit && (value & (scale - 1)) == 0 {
+ Some(UImm12Scaled {
+ value: value as u16,
+ scale_ty,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero(scale_ty: Type) -> UImm12Scaled {
+ UImm12Scaled { value: 0, scale_ty }
+ }
+
+ /// Encoded bits.
+ pub fn bits(&self) -> u32 {
+ (self.value as u32 / self.scale_ty.bytes()) & 0xfff
+ }
+
+ /// Value after scaling.
+ pub fn value(&self) -> u32 {
+ self.value as u32
+ }
+
+ /// The value type which is the scaling base.
+ pub fn scale_ty(&self) -> Type {
+ self.scale_ty
+ }
+}
+
+/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
+/// left by 0 or 12 places.
+#[derive(Clone, Debug)]
+pub struct Imm12 {
+ /// The immediate bits.
+ pub bits: u16,
+ /// Whether the immediate bits are shifted left by 12 or not.
+ pub shift12: bool,
+}
+
+impl Imm12 {
+ /// Compute a Imm12 from raw bits, if possible.
+ pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
+ if val == 0 {
+ Some(Imm12 {
+ bits: 0,
+ shift12: false,
+ })
+ } else if val < 0xfff {
+ Some(Imm12 {
+ bits: val as u16,
+ shift12: false,
+ })
+ } else if val < 0xfff_000 && (val & 0xfff == 0) {
+ Some(Imm12 {
+ bits: (val >> 12) as u16,
+ shift12: true,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero() -> Self {
+ Imm12 {
+ bits: 0,
+ shift12: false,
+ }
+ }
+
+ /// Bits for 2-bit "shift" field in e.g. AddI.
+ pub fn shift_bits(&self) -> u32 {
+ if self.shift12 {
+ 0b01
+ } else {
+ 0b00
+ }
+ }
+
+ /// Bits for 12-bit "imm" field in e.g. AddI.
+ pub fn imm_bits(&self) -> u32 {
+ self.bits as u32
+ }
+}
+
+/// An immediate for logical instructions.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ImmLogic {
+ /// The actual value.
+ value: u64,
+ /// `N` flag.
+ pub n: bool,
+ /// `S` field: element size and element bits.
+ pub r: u8,
+ /// `R` field: rotate amount.
+ pub s: u8,
+ /// Was this constructed for a 32-bit or 64-bit instruction?
+ pub size: OperandSize,
+}
+
+impl ImmLogic {
+ /// Compute an ImmLogic from raw bits, if possible.
+ pub fn maybe_from_u64(value: u64, ty: Type) -> Option<ImmLogic> {
+ // Note: This function is a port of VIXL's Assembler::IsImmLogical.
+
+ if ty != I64 && ty != I32 {
+ return None;
+ }
+ let operand_size = OperandSize::from_ty(ty);
+
+ let original_value = value;
+
+ let value = if ty == I32 {
+ // To handle 32-bit logical immediates, the very easiest thing is to repeat
+ // the input value twice to make a 64-bit word. The correct encoding of that
+ // as a logical immediate will also be the correct encoding of the 32-bit
+ // value.
+
+ // Avoid making the assumption that the most-significant 32 bits are zero by
+ // shifting the value left and duplicating it.
+ let value = value << 32;
+ value | value >> 32
+ } else {
+ value
+ };
+
+ // Logical immediates are encoded using parameters n, imm_s and imm_r using
+ // the following table:
+ //
+ // N imms immr size S R
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
+ // (s bits must not be all set)
+ //
+ // A pattern is constructed of size bits, where the least significant S+1 bits
+ // are set. The pattern is rotated right by R, and repeated across a 32 or
+ // 64-bit value, depending on destination register width.
+ //
+ // Put another way: the basic format of a logical immediate is a single
+ // contiguous stretch of 1 bits, repeated across the whole word at intervals
+ // given by a power of 2. To identify them quickly, we first locate the
+ // lowest stretch of 1 bits, then the next 1 bit above that; that combination
+ // is different for every logical immediate, so it gives us all the
+ // information we need to identify the only logical immediate that our input
+ // could be, and then we simply check if that's the value we actually have.
+ //
+ // (The rotation parameter does give the possibility of the stretch of 1 bits
+ // going 'round the end' of the word. To deal with that, we observe that in
+ // any situation where that happens the bitwise NOT of the value is also a
+ // valid logical immediate. So we simply invert the input whenever its low bit
+ // is set, and then we know that the rotated case can't arise.)
+ let (value, inverted) = if value & 1 == 1 {
+ (!value, true)
+ } else {
+ (value, false)
+ };
+
+ if value == 0 {
+ return None;
+ }
+
+ // The basic analysis idea: imagine our input word looks like this.
+ //
+ // 0011111000111110001111100011111000111110001111100011111000111110
+ // c b a
+ // |<--d-->|
+ //
+ // We find the lowest set bit (as an actual power-of-2 value, not its index)
+ // and call it a. Then we add a to our original number, which wipes out the
+ // bottommost stretch of set bits and replaces it with a 1 carried into the
+ // next zero bit. Then we look for the new lowest set bit, which is in
+ // position b, and subtract it, so now our number is just like the original
+ // but with the lowest stretch of set bits completely gone. Now we find the
+ // lowest set bit again, which is position c in the diagram above. Then we'll
+ // measure the distance d between bit positions a and c (using CLZ), and that
+ // tells us that the only valid logical immediate that could possibly be equal
+ // to this number is the one in which a stretch of bits running from a to just
+ // below b is replicated every d bits.
+ fn lowest_set_bit(value: u64) -> u64 {
+ let bit = value.trailing_zeros();
+ 1u64.checked_shl(bit).unwrap_or(0)
+ }
+ let a = lowest_set_bit(value);
+ assert_ne!(0, a);
+ let value_plus_a = value.wrapping_add(a);
+ let b = lowest_set_bit(value_plus_a);
+ let value_plus_a_minus_b = value_plus_a - b;
+ let c = lowest_set_bit(value_plus_a_minus_b);
+
+ let (d, clz_a, out_n, mask) = if c != 0 {
+ // The general case, in which there is more than one stretch of set bits.
+ // Compute the repeat distance d, and set up a bitmask covering the basic
+ // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
+ // of these cases the N bit of the output will be zero.
+ let clz_a = a.leading_zeros();
+ let clz_c = c.leading_zeros();
+ let d = clz_a - clz_c;
+ let mask = (1 << d) - 1;
+ (d, clz_a, 0, mask)
+ } else {
+ (64, a.leading_zeros(), 1, u64::max_value())
+ };
+
+ // If the repeat period d is not a power of two, it can't be encoded.
+ if !d.is_power_of_two() {
+ return None;
+ }
+
+ if ((b.wrapping_sub(a)) & !mask) != 0 {
+ // If the bit stretch (b - a) does not fit within the mask derived from the
+ // repeat period, then fail.
+ return None;
+ }
+
+ // The only possible option is b - a repeated every d bits. Now we're going to
+ // actually construct the valid logical immediate derived from that
+ // specification, and see if it equals our original input.
+ //
+ // To repeat a value every d bits, we multiply it by a number of the form
+ // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
+ // be derived using a table lookup on CLZ(d).
+ const MULTIPLIERS: [u64; 6] = [
+ 0x0000000000000001,
+ 0x0000000100000001,
+ 0x0001000100010001,
+ 0x0101010101010101,
+ 0x1111111111111111,
+ 0x5555555555555555,
+ ];
+ let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize];
+ let candidate = b.wrapping_sub(a) * multiplier;
+
+ if value != candidate {
+ // The candidate pattern doesn't match our input value, so fail.
+ return None;
+ }
+
+ // We have a match! This is a valid logical immediate, so now we have to
+ // construct the bits and pieces of the instruction encoding that generates
+ // it.
+
+ // Count the set bits in our basic stretch. The special case of clz(0) == -1
+ // makes the answer come out right for stretches that reach the very top of
+ // the word (e.g. numbers like 0xffffc00000000000).
+ let clz_b = if b == 0 {
+ u32::max_value() // -1
+ } else {
+ b.leading_zeros()
+ };
+ let s = clz_a.wrapping_sub(clz_b);
+
+ // Decide how many bits to rotate right by, to put the low bit of that basic
+ // stretch in position a.
+ let (s, r) = if inverted {
+ // If we inverted the input right at the start of this function, here's
+ // where we compensate: the number of set bits becomes the number of clear
+ // bits, and the rotation count is based on position b rather than position
+ // a (since b is the location of the 'lowest' 1 bit after inversion).
+ // Need wrapping for when clz_b is max_value() (for when b == 0).
+ (d - s, clz_b.wrapping_add(1) & (d - 1))
+ } else {
+ (s, (clz_a + 1) & (d - 1))
+ };
+
+ // Now we're done, except for having to encode the S output in such a way that
+ // it gives both the number of set bits and the length of the repeated
+ // segment. The s field is encoded like this:
+ //
+ // imms size S
+ // ssssss 64 UInt(ssssss)
+ // 0sssss 32 UInt(sssss)
+ // 10ssss 16 UInt(ssss)
+ // 110sss 8 UInt(sss)
+ // 1110ss 4 UInt(ss)
+ // 11110s 2 UInt(s)
+ //
+ // So we 'or' (2 * -d) with our computed s to form imms.
+ let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f;
+ debug_assert!(u8::try_from(r).is_ok());
+ debug_assert!(u8::try_from(s).is_ok());
+ Some(ImmLogic {
+ value: original_value,
+ n: out_n != 0,
+ r: r as u8,
+ s: s as u8,
+ size: operand_size,
+ })
+ }
+
+ /// Returns bits ready for encoding: (N:1, R:6, S:6)
+ pub fn enc_bits(&self) -> u32 {
+ ((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32)
+ }
+
+ /// Returns the value that this immediate represents.
+ pub fn value(&self) -> u64 {
+ self.value
+ }
+
+ /// Return an immediate for the bitwise-inverted value.
+ pub fn invert(&self) -> ImmLogic {
+ // For every ImmLogical immediate, the inverse can also be encoded.
+ Self::maybe_from_u64(!self.value, self.size.to_ty()).unwrap()
+ }
+
+ /// This provides a safe(ish) way to avoid the costs of `maybe_from_u64` when we want to
+ /// encode a constant that we know at compiler-build time. It constructs an `ImmLogic` from
+ /// the fields `n`, `r`, `s` and `size`, but in a debug build, checks that `value_to_check`
+ /// corresponds to those four fields. The intention is that, in a non-debug build, this
+ /// reduces to something small enough that it will be a candidate for inlining.
+ pub fn from_n_r_s(value_to_check: u64, n: bool, r: u8, s: u8, size: OperandSize) -> Self {
+ // Construct it from the components we got given.
+ let imml = Self {
+ value: value_to_check,
+ n,
+ r,
+ s,
+ size,
+ };
+
+ // In debug mode, check that `n`/`r`/`s` are correct, given `value` and `size`.
+ debug_assert!(match ImmLogic::maybe_from_u64(
+ value_to_check,
+ if size == OperandSize::Size64 {
+ I64
+ } else {
+ I32
+ }
+ ) {
+ None => false, // fail: `value` is unrepresentable
+ Some(imml_check) => imml_check == imml,
+ });
+
+ imml
+ }
+}
+
+/// An immediate for shift instructions.
+#[derive(Clone, Debug)]
+pub struct ImmShift {
+ /// 6-bit shift amount.
+ pub imm: u8,
+}
+
+impl ImmShift {
+ /// Create an ImmShift from raw bits, if possible.
+ pub fn maybe_from_u64(val: u64) -> Option<ImmShift> {
+ if val < 64 {
+ Some(ImmShift { imm: val as u8 })
+ } else {
+ None
+ }
+ }
+
+ /// Get the immediate value.
+ pub fn value(&self) -> u8 {
+ self.imm
+ }
+}
+
+/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift.
+#[derive(Clone, Copy, Debug)]
+pub struct MoveWideConst {
+ /// The value.
+ pub bits: u16,
+ /// Result is `bits` shifted 16*shift bits to the left.
+ pub shift: u8,
+}
+
+impl MoveWideConst {
+ /// Construct a MoveWideConst from an arbitrary 64-bit constant if possible.
+ pub fn maybe_from_u64(value: u64) -> Option<MoveWideConst> {
+ let mask0 = 0x0000_0000_0000_ffffu64;
+ let mask1 = 0x0000_0000_ffff_0000u64;
+ let mask2 = 0x0000_ffff_0000_0000u64;
+ let mask3 = 0xffff_0000_0000_0000u64;
+
+ if value == (value & mask0) {
+ return Some(MoveWideConst {
+ bits: (value & mask0) as u16,
+ shift: 0,
+ });
+ }
+ if value == (value & mask1) {
+ return Some(MoveWideConst {
+ bits: ((value >> 16) & mask0) as u16,
+ shift: 1,
+ });
+ }
+ if value == (value & mask2) {
+ return Some(MoveWideConst {
+ bits: ((value >> 32) & mask0) as u16,
+ shift: 2,
+ });
+ }
+ if value == (value & mask3) {
+ return Some(MoveWideConst {
+ bits: ((value >> 48) & mask0) as u16,
+ shift: 3,
+ });
+ }
+ None
+ }
+
+ pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<MoveWideConst> {
+ let shift_enc = shift / 16;
+ if shift_enc > 3 {
+ None
+ } else {
+ Some(MoveWideConst {
+ bits: imm,
+ shift: shift_enc,
+ })
+ }
+ }
+
+ /// Returns the value that this constant represents.
+ pub fn value(&self) -> u64 {
+ (self.bits as u64) << (16 * self.shift)
+ }
+}
+
+/// Advanced SIMD modified immediate as used by MOVI/MVNI.
+#[derive(Clone, Copy, Debug)]
+pub struct ASIMDMovModImm {
+ imm: u8,
+ shift: u8,
+ shift_ones: bool,
+}
+
+impl ASIMDMovModImm {
+ pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDMovModImm> {
+ match size {
+ ScalarSize::Size8 => Some(ASIMDMovModImm {
+ imm: value as u8,
+ shift: 0,
+ shift_ones: false,
+ }),
+ _ => None,
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero() -> Self {
+ ASIMDMovModImm {
+ imm: 0,
+ shift: 0,
+ shift_ones: false,
+ }
+ }
+
+ pub fn value(&self) -> (u8, u32, bool) {
+ (self.imm, self.shift as u32, self.shift_ones)
+ }
+}
+
+impl PrettyPrint for NZCV {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
+ format!(
+ "#{}{}{}{}",
+ fmt('n', self.n),
+ fmt('z', self.z),
+ fmt('c', self.c),
+ fmt('v', self.v)
+ )
+ }
+}
+
+impl PrettyPrint for UImm5 {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for Imm12 {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ let shift = if self.shift12 { 12 } else { 0 };
+ let value = u32::from(self.bits) << shift;
+ format!("#{}", value)
+ }
+}
+
+impl PrettyPrint for SImm7Scaled {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for FPULeftShiftImm {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.amount)
+ }
+}
+
+impl PrettyPrint for FPURightShiftImm {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.amount)
+ }
+}
+
+impl PrettyPrint for SImm9 {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for UImm12Scaled {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for ImmLogic {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value())
+ }
+}
+
+impl PrettyPrint for ImmShift {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.imm)
+ }
+}
+
+impl PrettyPrint for MoveWideConst {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ if self.shift == 0 {
+ format!("#{}", self.bits)
+ } else {
+ format!("#{}, LSL #{}", self.bits, self.shift * 16)
+ }
+ }
+}
+
+impl PrettyPrint for ASIMDMovModImm {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ if self.shift == 0 {
+ format!("#{}", self.imm)
+ } else {
+ let shift_type = if self.shift_ones { "MSL" } else { "LSL" };
+ format!("#{}, {} #{}", self.imm, shift_type, self.shift)
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn imm_logical_test() {
+ assert_eq!(None, ImmLogic::maybe_from_u64(0, I64));
+ assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64));
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 1,
+ n: true,
+ r: 0,
+ s: 0,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(1, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 2,
+ n: true,
+ r: 63,
+ s: 0,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(2, I64)
+ );
+
+ assert_eq!(None, ImmLogic::maybe_from_u64(5, I64));
+
+ assert_eq!(None, ImmLogic::maybe_from_u64(11, I64));
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 248,
+ n: true,
+ r: 61,
+ s: 4,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(248, I64)
+ );
+
+ assert_eq!(None, ImmLogic::maybe_from_u64(249, I64));
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 1920,
+ n: true,
+ r: 57,
+ s: 3,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(1920, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x7ffe,
+ n: true,
+ r: 63,
+ s: 13,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x7ffe, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x30000,
+ n: true,
+ r: 48,
+ s: 1,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x30000, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x100000,
+ n: true,
+ r: 44,
+ s: 0,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x100000, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: u64::max_value() - 1,
+ n: true,
+ r: 63,
+ s: 62,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0xaaaaaaaaaaaaaaaa,
+ n: false,
+ r: 1,
+ s: 60,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x8181818181818181,
+ n: false,
+ r: 1,
+ s: 49,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x8181818181818181, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0xffc3ffc3ffc3ffc3,
+ n: false,
+ r: 10,
+ s: 43,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x100000001,
+ n: false,
+ r: 0,
+ s: 0,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x100000001, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x1111111111111111,
+ n: false,
+ r: 0,
+ s: 56,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x1111111111111111, I64)
+ );
+
+ for n in 0..2 {
+ let types = if n == 0 { vec![I64, I32] } else { vec![I64] };
+ for s in 0..64 {
+ for r in 0..64 {
+ let imm = get_logical_imm(n, s, r);
+ for &ty in &types {
+ match ImmLogic::maybe_from_u64(imm, ty) {
+ Some(ImmLogic { value, .. }) => {
+ assert_eq!(imm, value);
+ ImmLogic::maybe_from_u64(!value, ty).unwrap();
+ }
+ None => assert_eq!(0, imm),
+ };
+ }
+ }
+ }
+ }
+ }
+
+ // Repeat a value that has `width` bits, across a 64-bit value.
+ fn repeat(value: u64, width: u64) -> u64 {
+ let mut result = value & ((1 << width) - 1);
+ let mut i = width;
+ while i < 64 {
+ result |= result << i;
+ i *= 2;
+ }
+ result
+ }
+
+ // Get the logical immediate, from the encoding N/R/S bits.
+ fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 {
+ // An integer is constructed from the n, imm_s and imm_r bits according to
+ // the following table:
+ //
+ // N imms immr size S R
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
+ // (s bits must not be all set)
+ //
+ // A pattern is constructed of size bits, where the least significant S+1
+ // bits are set. The pattern is rotated right by R, and repeated across a
+ // 64-bit value.
+
+ if n == 1 {
+ if s == 0x3f {
+ return 0;
+ }
+ let bits = (1u64 << (s + 1)) - 1;
+ bits.rotate_right(r)
+ } else {
+ if (s >> 1) == 0x1f {
+ return 0;
+ }
+ let mut width = 0x20;
+ while width >= 0x2 {
+ if (s & width) == 0 {
+ let mask = width - 1;
+ if (s & mask) == mask {
+ return 0;
+ }
+ let bits = (1u64 << ((s & mask) + 1)) - 1;
+ return repeat(bits.rotate_right(r & mask), width.into());
+ }
+ width >>= 1;
+ }
+ unreachable!();
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
new file mode 100644
index 0000000000..278302018e
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
@@ -0,0 +1,4057 @@
+//! This module defines aarch64-specific machine instruction types.
+
+// Some variants are not constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+
+use crate::binemit::CodeOffset;
+use crate::ir::types::{
+ B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8,
+ I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
+};
+use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type};
+use crate::isa::CallConv;
+use crate::machinst::*;
+use crate::{settings, CodegenError, CodegenResult};
+
+use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
+use regalloc::{RegUsageCollector, RegUsageMapper};
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::convert::TryFrom;
+use smallvec::{smallvec, SmallVec};
+use std::string::{String, ToString};
+
+pub mod regs;
+pub use self::regs::*;
+pub mod imms;
+pub use self::imms::*;
+pub mod args;
+pub use self::args::*;
+pub mod emit;
+pub use self::emit::*;
+pub mod unwind;
+
+#[cfg(test)]
+mod emit_tests;
+
+//=============================================================================
+// Instructions (top level): definition
+
+/// An ALU operation. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp {
+ Add32,
+ Add64,
+ Sub32,
+ Sub64,
+ Orr32,
+ Orr64,
+ OrrNot32,
+ OrrNot64,
+ And32,
+ And64,
+ AndNot32,
+ AndNot64,
+ /// XOR (AArch64 calls this "EOR")
+ Eor32,
+ /// XOR (AArch64 calls this "EOR")
+ Eor64,
+ /// XNOR (AArch64 calls this "EOR-NOT")
+ EorNot32,
+ /// XNOR (AArch64 calls this "EOR-NOT")
+ EorNot64,
+ /// Add, setting flags
+ AddS32,
+ /// Add, setting flags
+ AddS64,
+ /// Sub, setting flags
+ SubS32,
+ /// Sub, setting flags
+ SubS64,
+ /// Signed multiply, high-word result
+ SMulH,
+ /// Unsigned multiply, high-word result
+ UMulH,
+ SDiv64,
+ UDiv64,
+ RotR32,
+ RotR64,
+ Lsr32,
+ Lsr64,
+ Asr32,
+ Asr64,
+ Lsl32,
+ Lsl64,
+}
+
+/// An ALU operation with three arguments.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp3 {
+ /// Multiply-add
+ MAdd32,
+ /// Multiply-add
+ MAdd64,
+ /// Multiply-sub
+ MSub32,
+ /// Multiply-sub
+ MSub64,
+}
+
+/// A floating-point unit (FPU) operation with one arg.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp1 {
+ Abs32,
+ Abs64,
+ Neg32,
+ Neg64,
+ Sqrt32,
+ Sqrt64,
+ Cvt32To64,
+ Cvt64To32,
+}
+
+/// A floating-point unit (FPU) operation with two args.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp2 {
+ Add32,
+ Add64,
+ Sub32,
+ Sub64,
+ Mul32,
+ Mul64,
+ Div32,
+ Div64,
+ Max32,
+ Max64,
+ Min32,
+ Min64,
+ /// Signed saturating add
+ Sqadd64,
+ /// Unsigned saturating add
+ Uqadd64,
+ /// Signed saturating subtract
+ Sqsub64,
+ /// Unsigned saturating subtract
+ Uqsub64,
+}
+
+/// A floating-point unit (FPU) operation with two args, a register and an immediate.
+#[derive(Copy, Clone, Debug)]
+pub enum FPUOpRI {
+ /// Unsigned right shift. Rd = Rn << #imm
+ UShr32(FPURightShiftImm),
+ /// Unsigned right shift. Rd = Rn << #imm
+ UShr64(FPURightShiftImm),
+ /// Shift left and insert. Rd |= Rn << #imm
+ Sli32(FPULeftShiftImm),
+ /// Shift left and insert. Rd |= Rn << #imm
+ Sli64(FPULeftShiftImm),
+}
+
+/// A floating-point unit (FPU) operation with three args.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp3 {
+ MAdd32,
+ MAdd64,
+}
+
+/// A conversion from an FP to an integer value.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FpuToIntOp {
+ F32ToU32,
+ F32ToI32,
+ F32ToU64,
+ F32ToI64,
+ F64ToU32,
+ F64ToI32,
+ F64ToU64,
+ F64ToI64,
+}
+
+/// A conversion from an integer to an FP value.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum IntToFpuOp {
+ U32ToF32,
+ I32ToF32,
+ U32ToF64,
+ I32ToF64,
+ U64ToF32,
+ I64ToF32,
+ U64ToF64,
+ I64ToF64,
+}
+
+/// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to
+/// nearest, and for 32- or 64-bit FP values.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FpuRoundMode {
+ Minus32,
+ Minus64,
+ Plus32,
+ Plus64,
+ Zero32,
+ Zero64,
+ Nearest32,
+ Nearest64,
+}
+
+/// Type of vector element extensions.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecExtendOp {
+ /// Signed extension of 8-bit elements
+ Sxtl8,
+ /// Signed extension of 16-bit elements
+ Sxtl16,
+ /// Signed extension of 32-bit elements
+ Sxtl32,
+ /// Unsigned extension of 8-bit elements
+ Uxtl8,
+ /// Unsigned extension of 16-bit elements
+ Uxtl16,
+ /// Unsigned extension of 32-bit elements
+ Uxtl32,
+}
+
+/// A vector ALU operation.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecALUOp {
+ /// Signed saturating add
+ Sqadd,
+ /// Unsigned saturating add
+ Uqadd,
+ /// Signed saturating subtract
+ Sqsub,
+ /// Unsigned saturating subtract
+ Uqsub,
+ /// Compare bitwise equal
+ Cmeq,
+ /// Compare signed greater than or equal
+ Cmge,
+ /// Compare signed greater than
+ Cmgt,
+ /// Compare unsigned higher
+ Cmhs,
+ /// Compare unsigned higher or same
+ Cmhi,
+ /// Floating-point compare equal
+ Fcmeq,
+ /// Floating-point compare greater than
+ Fcmgt,
+ /// Floating-point compare greater than or equal
+ Fcmge,
+ /// Bitwise and
+ And,
+ /// Bitwise bit clear
+ Bic,
+ /// Bitwise inclusive or
+ Orr,
+ /// Bitwise exclusive or
+ Eor,
+ /// Bitwise select
+ Bsl,
+ /// Unsigned maximum pairwise
+ Umaxp,
+ /// Add
+ Add,
+ /// Subtract
+ Sub,
+ /// Multiply
+ Mul,
+ /// Signed shift left
+ Sshl,
+ /// Unsigned shift left
+ Ushl,
+ /// Unsigned minimum
+ Umin,
+ /// Signed minimum
+ Smin,
+ /// Unsigned maximum
+ Umax,
+ /// Signed maximum
+ Smax,
+ /// Unsigned rounding halving add
+ Urhadd,
+ /// Floating-point add
+ Fadd,
+ /// Floating-point subtract
+ Fsub,
+ /// Floating-point divide
+ Fdiv,
+ /// Floating-point maximum
+ Fmax,
+ /// Floating-point minimum
+ Fmin,
+ /// Floating-point multiply
+ Fmul,
+ /// Add pairwise
+ Addp,
+ /// Unsigned multiply add long
+ Umlal,
+ /// Zip vectors (primary) [meaning, high halves]
+ Zip1,
+ /// Signed multiply long (low halves)
+ Smull,
+ /// Signed multiply long (high halves)
+ Smull2,
+}
+
+/// A Vector miscellaneous operation with two registers.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecMisc2 {
+ /// Bitwise NOT
+ Not,
+ /// Negate
+ Neg,
+ /// Absolute value
+ Abs,
+ /// Floating-point absolute value
+ Fabs,
+ /// Floating-point negate
+ Fneg,
+ /// Floating-point square root
+ Fsqrt,
+ /// Reverse elements in 64-bit doublewords
+ Rev64,
+ /// Shift left long (by element size)
+ Shll,
+ /// Floating-point convert to signed integer, rounding toward zero
+ Fcvtzs,
+ /// Floating-point convert to unsigned integer, rounding toward zero
+ Fcvtzu,
+ /// Signed integer convert to floating-point
+ Scvtf,
+ /// Unsigned integer convert to floating-point
+ Ucvtf,
+ /// Floating point round to integral, rounding towards nearest
+ Frintn,
+ /// Floating point round to integral, rounding towards zero
+ Frintz,
+ /// Floating point round to integral, rounding towards minus infinity
+ Frintm,
+ /// Floating point round to integral, rounding towards plus infinity
+ Frintp,
+}
+
+/// A Vector narrowing operation with two registers.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecMiscNarrowOp {
+ /// Extract Narrow
+ Xtn,
+ /// Signed saturating extract narrow
+ Sqxtn,
+ /// Signed saturating extract unsigned narrow
+ Sqxtun,
+}
+
+/// An operation across the lanes of vectors.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecLanesOp {
+ /// Integer addition across a vector
+ Addv,
+ /// Unsigned minimum across a vector
+ Uminv,
+}
+
+/// A shift-by-immediate operation on each lane of a vector.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecShiftImmOp {
+ // Unsigned shift left
+ Shl,
+ // Unsigned shift right
+ Ushr,
+ // Signed shift right
+ Sshr,
+}
+
+/// An operation on the bits of a register. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum BitOp {
+ /// Bit reverse
+ RBit32,
+ /// Bit reverse
+ RBit64,
+ Clz32,
+ Clz64,
+ Cls32,
+ Cls64,
+}
+
+impl BitOp {
+ /// What is the opcode's native width?
+ pub fn operand_size(&self) -> OperandSize {
+ match self {
+ BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => OperandSize::Size32,
+ _ => OperandSize::Size64,
+ }
+ }
+
+ /// Get the assembly mnemonic for this opcode.
+ pub fn op_str(&self) -> &'static str {
+ match self {
+ BitOp::RBit32 | BitOp::RBit64 => "rbit",
+ BitOp::Clz32 | BitOp::Clz64 => "clz",
+ BitOp::Cls32 | BitOp::Cls64 => "cls",
+ }
+ }
+}
+
+impl From<(Opcode, Type)> for BitOp {
+ /// Get the BitOp from the IR opcode.
+ fn from(op_ty: (Opcode, Type)) -> BitOp {
+ match op_ty {
+ (Opcode::Bitrev, I32) => BitOp::RBit32,
+ (Opcode::Bitrev, I64) => BitOp::RBit64,
+ (Opcode::Clz, I32) => BitOp::Clz32,
+ (Opcode::Clz, I64) => BitOp::Clz64,
+ (Opcode::Cls, I32) => BitOp::Cls32,
+ (Opcode::Cls, I64) => BitOp::Cls64,
+ _ => unreachable!("Called with non-bit op!: {:?}", op_ty),
+ }
+ }
+}
+
+/// Additional information for (direct) Call instructions, left out of line to lower the size of
+/// the Inst enum.
+#[derive(Clone, Debug)]
+pub struct CallInfo {
+ pub dest: ExternalName,
+ pub uses: Vec<Reg>,
+ pub defs: Vec<Writable<Reg>>,
+ pub opcode: Opcode,
+ pub caller_callconv: CallConv,
+ pub callee_callconv: CallConv,
+}
+
+/// Additional information for CallInd instructions, left out of line to lower the size of the Inst
+/// enum.
+#[derive(Clone, Debug)]
+pub struct CallIndInfo {
+ pub rn: Reg,
+ pub uses: Vec<Reg>,
+ pub defs: Vec<Writable<Reg>>,
+ pub opcode: Opcode,
+ pub caller_callconv: CallConv,
+ pub callee_callconv: CallConv,
+}
+
+/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst
+/// enum.
+#[derive(Clone, Debug)]
+pub struct JTSequenceInfo {
+ pub targets: Vec<BranchTarget>,
+ pub default_target: BranchTarget,
+ pub targets_for_term: Vec<MachLabel>, // needed for MachTerminator.
+}
+
+/// Instruction formats.
+#[derive(Clone, Debug)]
+pub enum Inst {
+ /// A no-op of zero size.
+ Nop0,
+
+ /// A no-op that is one instruction large.
+ Nop4,
+
+ /// An ALU operation with two register sources and a register destination.
+ AluRRR {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ },
+ /// An ALU operation with three register sources and a register destination.
+ AluRRRR {
+ alu_op: ALUOp3,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ ra: Reg,
+ },
+ /// An ALU operation with a register source and an immediate-12 source, and a register
+ /// destination.
+ AluRRImm12 {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ imm12: Imm12,
+ },
+ /// An ALU operation with a register source and an immediate-logic source, and a register destination.
+ AluRRImmLogic {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ imml: ImmLogic,
+ },
+ /// An ALU operation with a register source and an immediate-shiftamt source, and a register destination.
+ AluRRImmShift {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ immshift: ImmShift,
+ },
+ /// An ALU operation with two register sources, one of which can be shifted, and a register
+ /// destination.
+ AluRRRShift {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ shiftop: ShiftOpAndAmt,
+ },
+ /// An ALU operation with two register sources, one of which can be {zero,sign}-extended and
+ /// shifted, and a register destination.
+ AluRRRExtend {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ extendop: ExtendOp,
+ },
+
+ /// A bit op instruction with a single register source.
+ BitRR {
+ op: BitOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// An unsigned (zero-extending) 8-bit load.
+ ULoad8 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A signed (sign-extending) 8-bit load.
+ SLoad8 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// An unsigned (zero-extending) 16-bit load.
+ ULoad16 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A signed (sign-extending) 16-bit load.
+ SLoad16 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// An unsigned (zero-extending) 32-bit load.
+ ULoad32 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A signed (sign-extending) 32-bit load.
+ SLoad32 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A 64-bit load.
+ ULoad64 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+
+ /// An 8-bit store.
+ Store8 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A 16-bit store.
+ Store16 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A 32-bit store.
+ Store32 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A 64-bit store.
+ Store64 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+
+ /// A store of a pair of registers.
+ StoreP64 {
+ rt: Reg,
+ rt2: Reg,
+ mem: PairAMode,
+ flags: MemFlags,
+ },
+ /// A load of a pair of registers.
+ LoadP64 {
+ rt: Writable<Reg>,
+ rt2: Writable<Reg>,
+ mem: PairAMode,
+ flags: MemFlags,
+ },
+
+ /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we
+ /// keep them separate at the `Inst` level for better pretty-printing
+ /// and faster `is_move()` logic.
+ Mov64 {
+ rd: Writable<Reg>,
+ rm: Reg,
+ },
+
+ /// A 32-bit MOV. Zeroes the top 32 bits of the destination. This is
+ /// effectively an alias for an unsigned 32-to-64-bit extension.
+ Mov32 {
+ rd: Writable<Reg>,
+ rm: Reg,
+ },
+
+ /// A MOVZ with a 16-bit immediate.
+ MovZ {
+ rd: Writable<Reg>,
+ imm: MoveWideConst,
+ size: OperandSize,
+ },
+
+ /// A MOVN with a 16-bit immediate.
+ MovN {
+ rd: Writable<Reg>,
+ imm: MoveWideConst,
+ size: OperandSize,
+ },
+
+ /// A MOVK with a 16-bit immediate.
+ MovK {
+ rd: Writable<Reg>,
+ imm: MoveWideConst,
+ size: OperandSize,
+ },
+
+ /// A sign- or zero-extend operation.
+ Extend {
+ rd: Writable<Reg>,
+ rn: Reg,
+ signed: bool,
+ from_bits: u8,
+ to_bits: u8,
+ },
+
+ /// A conditional-select operation.
+ CSel {
+ rd: Writable<Reg>,
+ cond: Cond,
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// A conditional-set operation.
+ CSet {
+ rd: Writable<Reg>,
+ cond: Cond,
+ },
+
+ /// A conditional comparison with an immediate.
+ CCmpImm {
+ size: OperandSize,
+ rn: Reg,
+ imm: UImm5,
+ nzcv: NZCV,
+ cond: Cond,
+ },
+
+ /// A synthetic insn, which is a load-linked store-conditional loop, that has the overall
+ /// effect of atomically modifying a memory location in a particular way. Because we have
+ /// no way to explain to the regalloc about earlyclobber registers, this instruction has
+ /// completely fixed operand registers, and we rely on the RA's coalescing to remove copies
+ /// in the surrounding code to the extent it can. The sequence is both preceded and
+ /// followed by a fence which is at least as comprehensive as that of the `Fence`
+ /// instruction below. This instruction is sequentially consistent. The operand
+ /// conventions are:
+ ///
+ /// x25 (rd) address
+ /// x26 (rd) second operand for `op`
+ /// x27 (wr) old value
+ /// x24 (wr) scratch reg; value afterwards has no meaning
+ /// x28 (wr) scratch reg; value afterwards has no meaning
+ AtomicRMW {
+ ty: Type, // I8, I16, I32 or I64
+ op: inst_common::AtomicRmwOp,
+ },
+
+ /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
+ /// store-conditional loop. (Although we could possibly implement it more directly using
+ /// CAS insns that are available in some revisions of AArch64 above 8.0). The sequence is
+ /// both preceded and followed by a fence which is at least as comprehensive as that of the
+ /// `Fence` instruction below. This instruction is sequentially consistent. Note that the
+ /// operand conventions, although very similar to AtomicRMW, are different:
+ ///
+ /// x25 (rd) address
+ /// x26 (rd) expected value
+ /// x28 (rd) replacement value
+ /// x27 (wr) old value
+ /// x24 (wr) scratch reg; value afterwards has no meaning
+ AtomicCAS {
+ ty: Type, // I8, I16, I32 or I64
+ },
+
+ /// Read `ty` bits from address `r_addr`, zero extend the loaded value to 64 bits and put it
+ /// in `r_data`. The load instruction is preceded by a fence at least as comprehensive as
+ /// that of the `Fence` instruction below. This instruction is sequentially consistent.
+ AtomicLoad {
+ ty: Type, // I8, I16, I32 or I64
+ r_data: Writable<Reg>,
+ r_addr: Reg,
+ },
+
+ /// Write the lowest `ty` bits of `r_data` to address `r_addr`, with a memory fence
+ /// instruction following the store. The fence is at least as comprehensive as that of the
+ /// `Fence` instruction below. This instruction is sequentially consistent.
+ AtomicStore {
+ ty: Type, // I8, I16, I32 or I64
+ r_data: Reg,
+ r_addr: Reg,
+ },
+
+ /// A memory fence. This must provide ordering to ensure that, at a minimum, neither loads
+ /// nor stores may move forwards or backwards across the fence. Currently emitted as "dmb
+ /// ish". This instruction is sequentially consistent.
+ Fence,
+
+ /// FPU move. Note that this is distinct from a vector-register
+ /// move; moving just 64 bits seems to be significantly faster.
+ FpuMove64 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// Vector register move.
+ FpuMove128 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// Move to scalar from a vector element.
+ FpuMoveFromVec {
+ rd: Writable<Reg>,
+ rn: Reg,
+ idx: u8,
+ size: VectorSize,
+ },
+
+ /// 1-op FPU instruction.
+ FpuRR {
+ fpu_op: FPUOp1,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// 2-op FPU instruction.
+ FpuRRR {
+ fpu_op: FPUOp2,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ },
+
+ FpuRRI {
+ fpu_op: FPUOpRI,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// 3-op FPU instruction.
+ FpuRRRR {
+ fpu_op: FPUOp3,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ ra: Reg,
+ },
+
+ /// FPU comparison, single-precision (32 bit).
+ FpuCmp32 {
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// FPU comparison, double-precision (64 bit).
+ FpuCmp64 {
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// Floating-point load, single-precision (32 bit).
+ FpuLoad32 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point store, single-precision (32 bit).
+ FpuStore32 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point load, double-precision (64 bit).
+ FpuLoad64 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point store, double-precision (64 bit).
+ FpuStore64 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point/vector load, 128 bit.
+ FpuLoad128 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point/vector store, 128 bit.
+ FpuStore128 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+
+ LoadFpuConst64 {
+ rd: Writable<Reg>,
+ const_data: u64,
+ },
+
+ LoadFpuConst128 {
+ rd: Writable<Reg>,
+ const_data: u128,
+ },
+
+ /// Conversion: FP -> integer.
+ FpuToInt {
+ op: FpuToIntOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// Conversion: integer -> FP.
+ IntToFpu {
+ op: IntToFpuOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// FP conditional select, 32 bit.
+ FpuCSel32 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ cond: Cond,
+ },
+ /// FP conditional select, 64 bit.
+ FpuCSel64 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ cond: Cond,
+ },
+
+ /// Round to integer.
+ FpuRound {
+ op: FpuRoundMode,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// Move from a GPR to a vector register. The scalar value is parked in the lowest lane
+ /// of the destination, and all other lanes are zeroed out. Currently only 32- and 64-bit
+ /// transactions are supported.
+ MovToFpu {
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: ScalarSize,
+ },
+
+ /// Move to a vector element from a GPR.
+ MovToVec {
+ rd: Writable<Reg>,
+ rn: Reg,
+ idx: u8,
+ size: VectorSize,
+ },
+
+ /// Unsigned move from a vector element to a GPR.
+ MovFromVec {
+ rd: Writable<Reg>,
+ rn: Reg,
+ idx: u8,
+ size: VectorSize,
+ },
+
+ /// Signed move from a vector element to a GPR.
+ MovFromVecSigned {
+ rd: Writable<Reg>,
+ rn: Reg,
+ idx: u8,
+ size: VectorSize,
+ scalar_size: OperandSize,
+ },
+
+ /// Duplicate general-purpose register to vector.
+ VecDup {
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Duplicate scalar to vector.
+ VecDupFromFpu {
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Duplicate immediate to vector.
+ VecDupImm {
+ rd: Writable<Reg>,
+ imm: ASIMDMovModImm,
+ invert: bool,
+ size: VectorSize,
+ },
+
+ /// Vector extend.
+ VecExtend {
+ t: VecExtendOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ high_half: bool,
+ },
+
+ /// Move vector element to another vector element.
+ VecMovElement {
+ rd: Writable<Reg>,
+ rn: Reg,
+ dest_idx: u8,
+ src_idx: u8,
+ size: VectorSize,
+ },
+
+ /// Vector narrowing operation.
+ VecMiscNarrow {
+ op: VecMiscNarrowOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ high_half: bool,
+ },
+
+ /// A vector ALU op.
+ VecRRR {
+ alu_op: VecALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ size: VectorSize,
+ },
+
+ /// Vector two register miscellaneous instruction.
+ VecMisc {
+ op: VecMisc2,
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Vector instruction across lanes.
+ VecLanes {
+ op: VecLanesOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Vector shift by immediate: Shift Left (immediate), Unsigned Shift Right (immediate),
+ /// Signed Shift Right (immediate). These are somewhat unusual in that, for right shifts,
+ /// the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive. A zero
+ /// right-shift cannot be encoded. Left shifts are "normal", though, having valid `imm`
+ /// values from 0 to lane-size-in-bits - 1 inclusive.
+ VecShiftImm {
+ op: VecShiftImmOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ imm: u8,
+ },
+
+ /// Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
+ /// of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
+ VecExtract {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ imm4: u8,
+ },
+
+ /// Table vector lookup - single register table. The table consists of 8-bit elements and is
+ /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
+ /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
+ /// vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
+ /// to 0.
+ VecTbl {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ is_extension: bool,
+ },
+
+ /// Table vector lookup - two register table. The table consists of 8-bit elements and is
+ /// stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
+ /// specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
+ /// the destination vector that correspond to out-of-range indices (greater than 31) unmodified
+ /// or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
+ /// modulo 32, that is v31 and v0 (in that order) are consecutive registers.
+ VecTbl2 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rn2: Reg,
+ rm: Reg,
+ is_extension: bool,
+ },
+
+ /// Load an element and replicate to all lanes of a vector.
+ VecLoadReplicate {
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn
+ /// control-flow diamond.
+ VecCSel {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ cond: Cond,
+ },
+
+ /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
+ MovToNZCV {
+ rn: Reg,
+ },
+
+ /// Move from the NZCV flags (actually a `MRS Xn, NZCV` insn).
+ MovFromNZCV {
+ rd: Writable<Reg>,
+ },
+
+ /// A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation
+ /// of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the
+ /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
+ /// target.
+ Call {
+ info: Box<CallInfo>,
+ },
+ /// A machine indirect-call instruction.
+ CallInd {
+ info: Box<CallIndInfo>,
+ },
+
+ // ---- branches (exactly one must appear at end of BB) ----
+ /// A machine return instruction.
+ Ret,
+
+ /// A placeholder instruction, generating no code, meaning that a function epilogue must be
+ /// inserted there.
+ EpiloguePlaceholder,
+
+ /// An unconditional branch.
+ Jump {
+ dest: BranchTarget,
+ },
+
+ /// A conditional branch. Contains two targets; at emission time, both are emitted, but
+ /// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the
+ /// choice of taken/not_taken (inverting the branch polarity as needed) based on the
+ /// fallthrough at the time of lowering.
+ CondBr {
+ taken: BranchTarget,
+ not_taken: BranchTarget,
+ kind: CondBrKind,
+ },
+
+ /// A conditional trap: execute a `udf` if the condition is true. This is
+ /// one VCode instruction because it uses embedded control flow; it is
+ /// logically a single-in, single-out region, but needs to appear as one
+ /// unit to the register allocator.
+ ///
+ /// The `CondBrKind` gives the conditional-branch condition that will
+ /// *execute* the embedded `Inst`. (In the emitted code, we use the inverse
+ /// of this condition in a branch that skips the trap instruction.)
+ TrapIf {
+ kind: CondBrKind,
+ trap_code: TrapCode,
+ },
+
+ /// An indirect branch through a register, augmented with set of all
+ /// possible successors.
+ IndirectBr {
+ rn: Reg,
+ targets: Vec<MachLabel>,
+ },
+
+ /// A "break" instruction, used for e.g. traps and debug breakpoints.
+ Brk,
+
+ /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at
+ /// runtime.
+ Udf {
+ trap_code: TrapCode,
+ },
+
+ /// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
+ /// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
+ /// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
+ /// need full `MemLabel` support.
+ Adr {
+ rd: Writable<Reg>,
+ /// Offset in range -2^20 .. 2^20.
+ off: i32,
+ },
+
+ /// Raw 32-bit word, used for inline constants and jump-table entries.
+ Word4 {
+ data: u32,
+ },
+
+ /// Raw 64-bit word, used for inline constants.
+ Word8 {
+ data: u64,
+ },
+
+ /// Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale).
+ JTSequence {
+ info: Box<JTSequenceInfo>,
+ ridx: Reg,
+ rtmp1: Writable<Reg>,
+ rtmp2: Writable<Reg>,
+ },
+
+ /// Load an inline symbol reference.
+ LoadExtName {
+ rd: Writable<Reg>,
+ name: Box<ExternalName>,
+ offset: i64,
+ },
+
+ /// Load address referenced by `mem` into `rd`.
+ LoadAddr {
+ rd: Writable<Reg>,
+ mem: AMode,
+ },
+
+ /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
+ /// controls how AMode::NominalSPOffset args are lowered.
+ VirtualSPOffsetAdj {
+ offset: i64,
+ },
+
+ /// Meta-insn, no-op in generated code: emit constant/branch veneer island
+ /// at this point (with a guard jump around it) if less than the needed
+ /// space is available before the next branch deadline. See the `MachBuffer`
+ /// implementation in `machinst/buffer.rs` for the overall algorithm. In
+ /// brief, we retain a set of "pending/unresolved label references" from
+ /// branches as we scan forward through instructions to emit machine code;
+ /// if we notice we're about to go out of range on an unresolved reference,
+ /// we stop, emit a bunch of "veneers" (branches in a form that has a longer
+ /// range, e.g. a 26-bit-offset unconditional jump), and point the original
+ /// label references to those. This is an "island" because it comes in the
+ /// middle of the code.
+ ///
+ /// This meta-instruction is a necessary part of the logic that determines
+ /// where to place islands. Ordinarily, we want to place them between basic
+ /// blocks, so we compute the worst-case size of each block, and emit the
+ /// island before starting a block if we would exceed a deadline before the
+ /// end of the block. However, some sequences (such as an inline jumptable)
+ /// are variable-length and not accounted for by this logic; so these
+ /// lowered sequences include an `EmitIsland` to trigger island generation
+ /// where necessary.
+ EmitIsland {
+ /// The needed space before the next deadline.
+ needed_space: CodeOffset,
+ },
+}
+
+fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
+ let mut count = 0;
+ for _ in 0..num_half_words {
+ if value & 0xffff == 0 {
+ count += 1;
+ }
+ value >>= 16;
+ }
+
+ count
+}
+
+#[test]
+fn inst_size_test() {
+ // This test will help with unintentionally growing the size
+ // of the Inst enum.
+ assert_eq!(32, std::mem::size_of::<Inst>());
+}
+
+impl Inst {
+ /// Create a move instruction.
+ pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+ assert!(to_reg.to_reg().get_class() == from_reg.get_class());
+ if from_reg.get_class() == RegClass::I64 {
+ Inst::Mov64 {
+ rd: to_reg,
+ rm: from_reg,
+ }
+ } else if from_reg.get_class() == RegClass::V128 {
+ Inst::FpuMove128 {
+ rd: to_reg,
+ rn: from_reg,
+ }
+ } else {
+ Inst::FpuMove64 {
+ rd: to_reg,
+ rn: from_reg,
+ }
+ }
+ }
+
+ /// Create a 32-bit move instruction.
+ pub fn mov32(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+ Inst::Mov32 {
+ rd: to_reg,
+ rm: from_reg,
+ }
+ }
+
+ /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
+ /// logical immediate, or constant pool).
+ pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
+ if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
+ // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
+ smallvec![Inst::MovZ {
+ rd,
+ imm,
+ size: OperandSize::Size64
+ }]
+ } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
+ // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
+ smallvec![Inst::MovN {
+ rd,
+ imm,
+ size: OperandSize::Size64
+ }]
+ } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
+ // Weird logical-instruction immediate in ORI using zero register
+ smallvec![Inst::AluRRImmLogic {
+ alu_op: ALUOp::Orr64,
+ rd,
+ rn: zero_reg(),
+ imml,
+ }]
+ } else {
+ let mut insts = smallvec![];
+
+ // If the top 32 bits are zero, use 32-bit `mov` operations.
+ let (num_half_words, size, negated) = if value >> 32 == 0 {
+ (2, OperandSize::Size32, (!value << 32) >> 32)
+ } else {
+ (4, OperandSize::Size64, !value)
+ };
+ // If the number of 0xffff half words is greater than the number of 0x0000 half words
+ // it is more efficient to use `movn` for the first instruction.
+ let first_is_inverted = count_zero_half_words(negated, num_half_words)
+ > count_zero_half_words(value, num_half_words);
+ // Either 0xffff or 0x0000 half words can be skipped, depending on the first
+ // instruction used.
+ let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
+ let mut first_mov_emitted = false;
+
+ for i in 0..num_half_words {
+ let imm16 = (value >> (16 * i)) & 0xffff;
+ if imm16 != ignored_halfword {
+ if !first_mov_emitted {
+ first_mov_emitted = true;
+ if first_is_inverted {
+ let imm =
+ MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
+ .unwrap();
+ insts.push(Inst::MovN { rd, imm, size });
+ } else {
+ let imm =
+ MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
+ insts.push(Inst::MovZ { rd, imm, size });
+ }
+ } else {
+ let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
+ insts.push(Inst::MovK { rd, imm, size });
+ }
+ }
+ }
+
+ assert!(first_mov_emitted);
+
+ insts
+ }
+ }
+
+ /// Create instructions that load a 32-bit floating-point constant.
+ pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ rd: Writable<Reg>,
+ value: u32,
+ mut alloc_tmp: F,
+ ) -> SmallVec<[Inst; 4]> {
+ if value == 0 {
+ smallvec![Inst::VecDupImm {
+ rd,
+ imm: ASIMDMovModImm::zero(),
+ invert: false,
+ size: VectorSize::Size8x8
+ }]
+ } else {
+ // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
+ // bits.
+ let tmp = alloc_tmp(RegClass::I64, I32);
+ let mut insts = Inst::load_constant(tmp, value as u64);
+
+ insts.push(Inst::MovToFpu {
+ rd,
+ rn: tmp.to_reg(),
+ size: ScalarSize::Size64,
+ });
+
+ insts
+ }
+ }
+
+ /// Create instructions that load a 64-bit floating-point constant.
+ pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ rd: Writable<Reg>,
+ const_data: u64,
+ mut alloc_tmp: F,
+ ) -> SmallVec<[Inst; 4]> {
+ if let Ok(const_data) = u32::try_from(const_data) {
+ Inst::load_fp_constant32(rd, const_data, alloc_tmp)
+ // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
+ // bits. Also, treat it as half of a 128-bit vector and consider replicated
+ // patterns. Scalar MOVI might also be an option.
+ } else if const_data & (u32::MAX as u64) == 0 {
+ let tmp = alloc_tmp(RegClass::I64, I64);
+ let mut insts = Inst::load_constant(tmp, const_data);
+
+ insts.push(Inst::MovToFpu {
+ rd,
+ rn: tmp.to_reg(),
+ size: ScalarSize::Size64,
+ });
+
+ insts
+ } else {
+ smallvec![Inst::LoadFpuConst64 { rd, const_data }]
+ }
+ }
+
+ /// Create instructions that load a 128-bit vector constant.
+ pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ rd: Writable<Reg>,
+ const_data: u128,
+ alloc_tmp: F,
+ ) -> SmallVec<[Inst; 5]> {
+ if let Ok(const_data) = u64::try_from(const_data) {
+ SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
+ } else if let Some((pattern, size)) =
+ Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
+ {
+ Inst::load_replicated_vector_pattern(
+ rd,
+ pattern,
+ VectorSize::from_lane_size(size, true),
+ alloc_tmp,
+ )
+ } else {
+ smallvec![Inst::LoadFpuConst128 { rd, const_data }]
+ }
+ }
+
+ /// Determine whether a 128-bit constant represents a vector consisting of elements with
+ /// the same value.
+ pub fn get_replicated_vector_pattern(
+ value: u128,
+ size: ScalarSize,
+ ) -> Option<(u64, ScalarSize)> {
+ let (mask, shift, next_size) = match size {
+ ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
+ ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
+ ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
+ ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
+ _ => return None,
+ };
+ let mut r = None;
+ let v = value & mask;
+
+ if (value >> shift) & mask == v {
+ r = Inst::get_replicated_vector_pattern(v, next_size);
+
+ if r.is_none() {
+ r = Some((v as u64, size));
+ }
+ }
+
+ r
+ }
+
+ /// Create instructions that load a 128-bit vector constant consisting of elements with
+ /// the same value.
+ pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ rd: Writable<Reg>,
+ pattern: u64,
+ size: VectorSize,
+ mut alloc_tmp: F,
+ ) -> SmallVec<[Inst; 5]> {
+ let lane_size = size.lane_size();
+
+ if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
+ smallvec![Inst::VecDupImm {
+ rd,
+ imm,
+ invert: false,
+ size
+ }]
+ } else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
+ debug_assert_ne!(lane_size, ScalarSize::Size8);
+ debug_assert_ne!(lane_size, ScalarSize::Size64);
+
+ smallvec![Inst::VecDupImm {
+ rd,
+ imm,
+ invert: true,
+ size
+ }]
+ } else {
+ let tmp = alloc_tmp(RegClass::I64, I64);
+ let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
+
+ insts.push(Inst::VecDup {
+ rd,
+ rn: tmp.to_reg(),
+ size,
+ });
+
+ insts
+ }
+ }
+
+ /// Generic constructor for a load (zero-extending where appropriate).
+ pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
+ match ty {
+ B1 | B8 | I8 => Inst::ULoad8 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ B16 | I16 => Inst::ULoad16 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ B32 | I32 | R32 => Inst::ULoad32 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ B64 | I64 | R64 => Inst::ULoad64 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ F32 => Inst::FpuLoad32 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ F64 => Inst::FpuLoad64 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ _ => {
+ if ty.is_vector() {
+ let bits = ty_bits(ty);
+ let rd = into_reg;
+
+ if bits == 128 {
+ Inst::FpuLoad128 { rd, mem, flags }
+ } else {
+ assert_eq!(bits, 64);
+ Inst::FpuLoad64 { rd, mem, flags }
+ }
+ } else {
+ unimplemented!("gen_load({})", ty);
+ }
+ }
+ }
+ }
+
+ /// Generic constructor for a store.
+ pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
+ match ty {
+ B1 | B8 | I8 => Inst::Store8 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ B16 | I16 => Inst::Store16 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ B32 | I32 | R32 => Inst::Store32 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ B64 | I64 | R64 => Inst::Store64 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ F32 => Inst::FpuStore32 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ F64 => Inst::FpuStore64 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ _ => {
+ if ty.is_vector() {
+ let bits = ty_bits(ty);
+ let rd = from_reg;
+
+ if bits == 128 {
+ Inst::FpuStore128 { rd, mem, flags }
+ } else {
+ assert_eq!(bits, 64);
+ Inst::FpuStore64 { rd, mem, flags }
+ }
+ } else {
+ unimplemented!("gen_store({})", ty);
+ }
+ }
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: get_regs
+
+fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) {
+ match memarg {
+ &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => {
+ collector.add_use(reg);
+ }
+ &AMode::RegReg(r1, r2, ..)
+ | &AMode::RegScaled(r1, r2, ..)
+ | &AMode::RegScaledExtended(r1, r2, ..)
+ | &AMode::RegExtended(r1, r2, ..) => {
+ collector.add_use(r1);
+ collector.add_use(r2);
+ }
+ &AMode::Label(..) => {}
+ &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => {
+ collector.add_mod(reg);
+ }
+ &AMode::FPOffset(..) => {
+ collector.add_use(fp_reg());
+ }
+ &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {
+ collector.add_use(stack_reg());
+ }
+ &AMode::RegOffset(r, ..) => {
+ collector.add_use(r);
+ }
+ }
+}
+
+fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) {
+ match pairmemarg {
+ &PairAMode::SignedOffset(reg, ..) => {
+ collector.add_use(reg);
+ }
+ &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => {
+ collector.add_mod(reg);
+ }
+ }
+}
+
+fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+ match inst {
+ &Inst::AluRRR { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::AluRRRR { rd, rn, rm, ra, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ collector.add_use(ra);
+ }
+ &Inst::AluRRImm12 { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::AluRRImmLogic { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::AluRRImmShift { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::AluRRRShift { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::AluRRRExtend { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::BitRR { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::ULoad8 { rd, ref mem, .. }
+ | &Inst::SLoad8 { rd, ref mem, .. }
+ | &Inst::ULoad16 { rd, ref mem, .. }
+ | &Inst::SLoad16 { rd, ref mem, .. }
+ | &Inst::ULoad32 { rd, ref mem, .. }
+ | &Inst::SLoad32 { rd, ref mem, .. }
+ | &Inst::ULoad64 { rd, ref mem, .. } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::Store8 { rd, ref mem, .. }
+ | &Inst::Store16 { rd, ref mem, .. }
+ | &Inst::Store32 { rd, ref mem, .. }
+ | &Inst::Store64 { rd, ref mem, .. } => {
+ collector.add_use(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::StoreP64 {
+ rt, rt2, ref mem, ..
+ } => {
+ collector.add_use(rt);
+ collector.add_use(rt2);
+ pairmemarg_regs(mem, collector);
+ }
+ &Inst::LoadP64 {
+ rt, rt2, ref mem, ..
+ } => {
+ collector.add_def(rt);
+ collector.add_def(rt2);
+ pairmemarg_regs(mem, collector);
+ }
+ &Inst::Mov64 { rd, rm } => {
+ collector.add_def(rd);
+ collector.add_use(rm);
+ }
+ &Inst::Mov32 { rd, rm } => {
+ collector.add_def(rd);
+ collector.add_use(rm);
+ }
+ &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::MovK { rd, .. } => {
+ collector.add_mod(rd);
+ }
+ &Inst::CSel { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::CSet { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::CCmpImm { rn, .. } => {
+ collector.add_use(rn);
+ }
+ &Inst::AtomicRMW { .. } => {
+ collector.add_use(xreg(25));
+ collector.add_use(xreg(26));
+ collector.add_def(writable_xreg(24));
+ collector.add_def(writable_xreg(27));
+ collector.add_def(writable_xreg(28));
+ }
+ &Inst::AtomicCAS { .. } => {
+ collector.add_use(xreg(25));
+ collector.add_use(xreg(26));
+ collector.add_use(xreg(28));
+ collector.add_def(writable_xreg(24));
+ collector.add_def(writable_xreg(27));
+ }
+ &Inst::AtomicLoad { r_data, r_addr, .. } => {
+ collector.add_use(r_addr);
+ collector.add_def(r_data);
+ }
+ &Inst::AtomicStore { r_data, r_addr, .. } => {
+ collector.add_use(r_addr);
+ collector.add_use(r_data);
+ }
+ &Inst::Fence {} => {}
+ &Inst::FpuMove64 { rd, rn } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuMove128 { rd, rn } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuMoveFromVec { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuRR { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuRRR { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::FpuRRI { fpu_op, rd, rn, .. } => {
+ match fpu_op {
+ FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd),
+ FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd),
+ }
+ collector.add_use(rn);
+ }
+ &Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ collector.add_use(ra);
+ }
+ &Inst::VecMisc { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+
+ &Inst::VecLanes { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecShiftImm { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecExtract { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::VecTbl {
+ rd,
+ rn,
+ rm,
+ is_extension,
+ } => {
+ collector.add_use(rn);
+ collector.add_use(rm);
+
+ if is_extension {
+ collector.add_mod(rd);
+ } else {
+ collector.add_def(rd);
+ }
+ }
+ &Inst::VecTbl2 {
+ rd,
+ rn,
+ rn2,
+ rm,
+ is_extension,
+ } => {
+ collector.add_use(rn);
+ collector.add_use(rn2);
+ collector.add_use(rm);
+
+ if is_extension {
+ collector.add_mod(rd);
+ } else {
+ collector.add_def(rd);
+ }
+ }
+ &Inst::VecLoadReplicate { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecCSel { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::FpuLoad32 { rd, ref mem, .. } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuLoad64 { rd, ref mem, .. } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuLoad128 { rd, ref mem, .. } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuStore32 { rd, ref mem, .. } => {
+ collector.add_use(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuStore64 { rd, ref mem, .. } => {
+ collector.add_use(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuStore128 { rd, ref mem, .. } => {
+ collector.add_use(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::FpuToInt { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::IntToFpu { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::FpuRound { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::MovToFpu { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::MovToVec { rd, rn, .. } => {
+ collector.add_mod(rd);
+ collector.add_use(rn);
+ }
+ &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecDup { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecDupFromFpu { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecDupImm { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::VecExtend { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecMovElement { rd, rn, .. } => {
+ collector.add_mod(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecMiscNarrow {
+ rd, rn, high_half, ..
+ } => {
+ collector.add_use(rn);
+
+ if high_half {
+ collector.add_mod(rd);
+ } else {
+ collector.add_def(rd);
+ }
+ }
+ &Inst::VecRRR {
+ alu_op, rd, rn, rm, ..
+ } => {
+ if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
+ collector.add_mod(rd);
+ } else {
+ collector.add_def(rd);
+ }
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::MovToNZCV { rn } => {
+ collector.add_use(rn);
+ }
+ &Inst::MovFromNZCV { rd } => {
+ collector.add_def(rd);
+ }
+ &Inst::Extend { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {}
+ &Inst::Call { ref info, .. } => {
+ collector.add_uses(&*info.uses);
+ collector.add_defs(&*info.defs);
+ }
+ &Inst::CallInd { ref info, .. } => {
+ collector.add_uses(&*info.uses);
+ collector.add_defs(&*info.defs);
+ collector.add_use(info.rn);
+ }
+ &Inst::CondBr { ref kind, .. } => match kind {
+ CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
+ collector.add_use(*rt);
+ }
+ CondBrKind::Cond(_) => {}
+ },
+ &Inst::IndirectBr { rn, .. } => {
+ collector.add_use(rn);
+ }
+ &Inst::Nop0 | Inst::Nop4 => {}
+ &Inst::Brk => {}
+ &Inst::Udf { .. } => {}
+ &Inst::TrapIf { ref kind, .. } => match kind {
+ CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
+ collector.add_use(*rt);
+ }
+ CondBrKind::Cond(_) => {}
+ },
+ &Inst::Adr { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::Word4 { .. } | &Inst::Word8 { .. } => {}
+ &Inst::JTSequence {
+ ridx, rtmp1, rtmp2, ..
+ } => {
+ collector.add_use(ridx);
+ collector.add_def(rtmp1);
+ collector.add_def(rtmp2);
+ }
+ &Inst::LoadExtName { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::LoadAddr { rd, ref mem } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::VirtualSPOffsetAdj { .. } => {}
+ &Inst::EmitIsland { .. } => {}
+ }
+}
+
+//=============================================================================
+// Instructions: map_regs
+
+fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
+ fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
+ if r.is_virtual() {
+ let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
+ *r = new;
+ }
+ }
+
+ fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if r.to_reg().is_virtual() {
+ let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+ }
+
+ fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if r.to_reg().is_virtual() {
+ let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+ }
+
+ fn map_mem<RUM: RegUsageMapper>(m: &RUM, mem: &mut AMode) {
+ // N.B.: we take only the pre-map here, but this is OK because the
+ // only addressing modes that update registers (pre/post-increment on
+ // AArch64) both read and write registers, so they are "mods" rather
+ // than "defs", so must be the same in both the pre- and post-map.
+ match mem {
+ &mut AMode::Unscaled(ref mut reg, ..) => map_use(m, reg),
+ &mut AMode::UnsignedOffset(ref mut reg, ..) => map_use(m, reg),
+ &mut AMode::RegReg(ref mut r1, ref mut r2)
+ | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..)
+ | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..)
+ | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => {
+ map_use(m, r1);
+ map_use(m, r2);
+ }
+ &mut AMode::Label(..) => {}
+ &mut AMode::PreIndexed(ref mut r, ..) => map_mod(m, r),
+ &mut AMode::PostIndexed(ref mut r, ..) => map_mod(m, r),
+ &mut AMode::FPOffset(..)
+ | &mut AMode::SPOffset(..)
+ | &mut AMode::NominalSPOffset(..) => {}
+ &mut AMode::RegOffset(ref mut r, ..) => map_use(m, r),
+ };
+ }
+
+ fn map_pairmem<RUM: RegUsageMapper>(m: &RUM, mem: &mut PairAMode) {
+ match mem {
+ &mut PairAMode::SignedOffset(ref mut reg, ..) => map_use(m, reg),
+ &mut PairAMode::PreIndexed(ref mut reg, ..) => map_def(m, reg),
+ &mut PairAMode::PostIndexed(ref mut reg, ..) => map_def(m, reg),
+ }
+ }
+
+ fn map_br<RUM: RegUsageMapper>(m: &RUM, br: &mut CondBrKind) {
+ match br {
+ &mut CondBrKind::Zero(ref mut reg) => map_use(m, reg),
+ &mut CondBrKind::NotZero(ref mut reg) => map_use(m, reg),
+ &mut CondBrKind::Cond(..) => {}
+ };
+ }
+
+ match inst {
+ &mut Inst::AluRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::AluRRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ref mut ra,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ map_use(mapper, ra);
+ }
+ &mut Inst::AluRRImm12 {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::AluRRImmLogic {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::AluRRImmShift {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::AluRRRShift {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::AluRRRExtend {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::BitRR {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::ULoad8 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::SLoad8 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::ULoad16 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::SLoad16 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::ULoad32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::SLoad32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+
+ &mut Inst::ULoad64 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Store8 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Store16 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Store32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Store64 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+
+ &mut Inst::StoreP64 {
+ ref mut rt,
+ ref mut rt2,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rt);
+ map_use(mapper, rt2);
+ map_pairmem(mapper, mem);
+ }
+ &mut Inst::LoadP64 {
+ ref mut rt,
+ ref mut rt2,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rt);
+ map_def(mapper, rt2);
+ map_pairmem(mapper, mem);
+ }
+ &mut Inst::Mov64 {
+ ref mut rd,
+ ref mut rm,
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rm);
+ }
+ &mut Inst::Mov32 {
+ ref mut rd,
+ ref mut rm,
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rm);
+ }
+ &mut Inst::MovZ { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::MovN { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::MovK { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::CSel {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::CSet { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::CCmpImm { ref mut rn, .. } => {
+ map_use(mapper, rn);
+ }
+ &mut Inst::AtomicRMW { .. } => {
+ // There are no vregs to map in this insn.
+ }
+ &mut Inst::AtomicCAS { .. } => {
+ // There are no vregs to map in this insn.
+ }
+ &mut Inst::AtomicLoad {
+ ref mut r_data,
+ ref mut r_addr,
+ ..
+ } => {
+ map_def(mapper, r_data);
+ map_use(mapper, r_addr);
+ }
+ &mut Inst::AtomicStore {
+ ref mut r_data,
+ ref mut r_addr,
+ ..
+ } => {
+ map_use(mapper, r_data);
+ map_use(mapper, r_addr);
+ }
+ &mut Inst::Fence {} => {}
+ &mut Inst::FpuMove64 {
+ ref mut rd,
+ ref mut rn,
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuMove128 {
+ ref mut rd,
+ ref mut rn,
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuMoveFromVec {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuRR {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuRRI {
+ fpu_op,
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ match fpu_op {
+ FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => map_def(mapper, rd),
+ FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => map_mod(mapper, rd),
+ }
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuRRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ref mut ra,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ map_use(mapper, ra);
+ }
+ &mut Inst::VecMisc {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecLanes {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecShiftImm {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecExtract {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::VecTbl {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ is_extension,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+
+ if is_extension {
+ map_mod(mapper, rd);
+ } else {
+ map_def(mapper, rd);
+ }
+ }
+ &mut Inst::VecTbl2 {
+ ref mut rd,
+ ref mut rn,
+ ref mut rn2,
+ ref mut rm,
+ is_extension,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rn2);
+ map_use(mapper, rm);
+
+ if is_extension {
+ map_mod(mapper, rd);
+ } else {
+ map_def(mapper, rd);
+ }
+ }
+ &mut Inst::VecLoadReplicate {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecCSel {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuCmp32 {
+ ref mut rn,
+ ref mut rm,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuCmp64 {
+ ref mut rn,
+ ref mut rm,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuLoad32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuLoad64 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuLoad128 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuStore32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuStore64 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuStore128 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::LoadFpuConst128 { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::FpuToInt {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::IntToFpu {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuCSel32 {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuCSel64 {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuRound {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::MovToFpu {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::MovToVec {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_mod(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::MovFromVec {
+ ref mut rd,
+ ref mut rn,
+ ..
+ }
+ | &mut Inst::MovFromVecSigned {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecDup {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecDupFromFpu {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecDupImm { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::VecExtend {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecMovElement {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_mod(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecMiscNarrow {
+ ref mut rd,
+ ref mut rn,
+ high_half,
+ ..
+ } => {
+ map_use(mapper, rn);
+
+ if high_half {
+ map_mod(mapper, rd);
+ } else {
+ map_def(mapper, rd);
+ }
+ }
+ &mut Inst::VecRRR {
+ alu_op,
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
+ map_mod(mapper, rd);
+ } else {
+ map_def(mapper, rd);
+ }
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::MovToNZCV { ref mut rn } => {
+ map_use(mapper, rn);
+ }
+ &mut Inst::MovFromNZCV { ref mut rd } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::Extend {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::Jump { .. } => {}
+ &mut Inst::Call { ref mut info } => {
+ for r in info.uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in info.defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ }
+ &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
+ &mut Inst::CallInd { ref mut info, .. } => {
+ for r in info.uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in info.defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ map_use(mapper, &mut info.rn);
+ }
+ &mut Inst::CondBr { ref mut kind, .. } => {
+ map_br(mapper, kind);
+ }
+ &mut Inst::IndirectBr { ref mut rn, .. } => {
+ map_use(mapper, rn);
+ }
+ &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {}
+ &mut Inst::TrapIf { ref mut kind, .. } => {
+ map_br(mapper, kind);
+ }
+ &mut Inst::Adr { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {}
+ &mut Inst::JTSequence {
+ ref mut ridx,
+ ref mut rtmp1,
+ ref mut rtmp2,
+ ..
+ } => {
+ map_use(mapper, ridx);
+ map_def(mapper, rtmp1);
+ map_def(mapper, rtmp2);
+ }
+ &mut Inst::LoadExtName { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::LoadAddr {
+ ref mut rd,
+ ref mut mem,
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::VirtualSPOffsetAdj { .. } => {}
+ &mut Inst::EmitIsland { .. } => {}
+ }
+}
+
+//=============================================================================
+// Instructions: misc functions and external interface
+
+impl MachInst for Inst {
+ type LabelUse = LabelUse;
+
+ fn get_regs(&self, collector: &mut RegUsageCollector) {
+ aarch64_get_regs(self, collector)
+ }
+
+ fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ aarch64_map_regs(self, mapper);
+ }
+
+ fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+ match self {
+ &Inst::Mov64 { rd, rm } => Some((rd, rm)),
+ &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
+ &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
+ _ => None,
+ }
+ }
+
+ fn is_epilogue_placeholder(&self) -> bool {
+ if let Inst::EpiloguePlaceholder = self {
+ true
+ } else {
+ false
+ }
+ }
+
+ fn is_included_in_clobbers(&self) -> bool {
+ // We exclude call instructions from the clobber-set when they are calls
+ // from caller to callee with the same ABI. Such calls cannot possibly
+ // force any new registers to be saved in the prologue, because anything
+ // that the callee clobbers, the caller is also allowed to clobber. This
+ // both saves work and enables us to more precisely follow the
+ // half-caller-save, half-callee-save SysV ABI for some vector
+ // registers.
+ //
+ // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
+ // more information on this ABI-implementation hack.
+ match self {
+ &Inst::Call { ref info } => info.caller_callconv != info.callee_callconv,
+ &Inst::CallInd { ref info } => info.caller_callconv != info.callee_callconv,
+ _ => true,
+ }
+ }
+
+ fn is_term<'a>(&'a self) -> MachTerminator<'a> {
+ match self {
+ &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
+ &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
+ &Inst::CondBr {
+ taken, not_taken, ..
+ } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
+ &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]),
+ &Inst::JTSequence { ref info, .. } => {
+ MachTerminator::Indirect(&info.targets_for_term[..])
+ }
+ _ => MachTerminator::None,
+ }
+ }
+
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+ assert!(ty.bits() <= 128);
+ Inst::mov(to_reg, from_reg)
+ }
+
+ fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ to_reg: Writable<Reg>,
+ value: u64,
+ ty: Type,
+ alloc_tmp: F,
+ ) -> SmallVec<[Inst; 4]> {
+ if ty == F64 {
+ Inst::load_fp_constant64(to_reg, value, alloc_tmp)
+ } else if ty == F32 {
+ Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp)
+ } else {
+ // Must be an integer type.
+ debug_assert!(
+ ty == B1
+ || ty == I8
+ || ty == B8
+ || ty == I16
+ || ty == B16
+ || ty == I32
+ || ty == B32
+ || ty == I64
+ || ty == B64
+ || ty == R32
+ || ty == R64
+ );
+ Inst::load_constant(to_reg, value)
+ }
+ }
+
+ fn gen_zero_len_nop() -> Inst {
+ Inst::Nop0
+ }
+
+ fn gen_nop(preferred_size: usize) -> Inst {
+ // We can't give a NOP (or any insn) < 4 bytes.
+ assert!(preferred_size >= 4);
+ Inst::Nop4
+ }
+
+ fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
+ None
+ }
+
+ fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+ match ty {
+ I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
+ F32 | F64 => Ok(RegClass::V128),
+ IFLAGS | FFLAGS => Ok(RegClass::I64),
+ B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => {
+ Ok(RegClass::V128)
+ }
+ _ => Err(CodegenError::Unsupported(format!(
+ "Unexpected SSA-value type: {}",
+ ty
+ ))),
+ }
+ }
+
+ fn gen_jump(target: MachLabel) -> Inst {
+ Inst::Jump {
+ dest: BranchTarget::Label(target),
+ }
+ }
+
+ fn reg_universe(flags: &settings::Flags) -> RealRegUniverse {
+ create_reg_universe(flags)
+ }
+
+ fn worst_case_size() -> CodeOffset {
+ // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
+ // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
+ // 64-bit f64 constants.
+ //
+ // Note that inline jump-tables handle island/pool insertion separately, so we do not need
+ // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
+ // feasible for other reasons).
+ 44
+ }
+
+ fn ref_type_regclass(_: &settings::Flags) -> RegClass {
+ RegClass::I64
+ }
+}
+
+//=============================================================================
+// Pretty-printing of instructions.
+
+fn mem_finalize_for_show(
+ mem: &AMode,
+ mb_rru: Option<&RealRegUniverse>,
+ state: &EmitState,
+) -> (String, AMode) {
+ let (mem_insts, mem) = mem_finalize(0, mem, state);
+ let mut mem_str = mem_insts
+ .into_iter()
+ .map(|inst| inst.show_rru(mb_rru))
+ .collect::<Vec<_>>()
+ .join(" ; ");
+ if !mem_str.is_empty() {
+ mem_str += " ; ";
+ }
+
+ (mem_str, mem)
+}
+
+impl PrettyPrint for Inst {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ self.pretty_print(mb_rru, &mut EmitState::default())
+ }
+}
+
+impl Inst {
+ fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+ fn op_name_size(alu_op: ALUOp) -> (&'static str, OperandSize) {
+ match alu_op {
+ ALUOp::Add32 => ("add", OperandSize::Size32),
+ ALUOp::Add64 => ("add", OperandSize::Size64),
+ ALUOp::Sub32 => ("sub", OperandSize::Size32),
+ ALUOp::Sub64 => ("sub", OperandSize::Size64),
+ ALUOp::Orr32 => ("orr", OperandSize::Size32),
+ ALUOp::Orr64 => ("orr", OperandSize::Size64),
+ ALUOp::And32 => ("and", OperandSize::Size32),
+ ALUOp::And64 => ("and", OperandSize::Size64),
+ ALUOp::Eor32 => ("eor", OperandSize::Size32),
+ ALUOp::Eor64 => ("eor", OperandSize::Size64),
+ ALUOp::AddS32 => ("adds", OperandSize::Size32),
+ ALUOp::AddS64 => ("adds", OperandSize::Size64),
+ ALUOp::SubS32 => ("subs", OperandSize::Size32),
+ ALUOp::SubS64 => ("subs", OperandSize::Size64),
+ ALUOp::SMulH => ("smulh", OperandSize::Size64),
+ ALUOp::UMulH => ("umulh", OperandSize::Size64),
+ ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
+ ALUOp::UDiv64 => ("udiv", OperandSize::Size64),
+ ALUOp::AndNot32 => ("bic", OperandSize::Size32),
+ ALUOp::AndNot64 => ("bic", OperandSize::Size64),
+ ALUOp::OrrNot32 => ("orn", OperandSize::Size32),
+ ALUOp::OrrNot64 => ("orn", OperandSize::Size64),
+ ALUOp::EorNot32 => ("eon", OperandSize::Size32),
+ ALUOp::EorNot64 => ("eon", OperandSize::Size64),
+ ALUOp::RotR32 => ("ror", OperandSize::Size32),
+ ALUOp::RotR64 => ("ror", OperandSize::Size64),
+ ALUOp::Lsr32 => ("lsr", OperandSize::Size32),
+ ALUOp::Lsr64 => ("lsr", OperandSize::Size64),
+ ALUOp::Asr32 => ("asr", OperandSize::Size32),
+ ALUOp::Asr64 => ("asr", OperandSize::Size64),
+ ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
+ ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
+ }
+ }
+
+ match self {
+ &Inst::Nop0 => "nop-zero-len".to_string(),
+ &Inst::Nop4 => "nop".to_string(),
+ &Inst::AluRRR { alu_op, rd, rn, rm } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let rm = show_ireg_sized(rm, mb_rru, size);
+ format!("{} {}, {}, {}", op, rd, rn, rm)
+ }
+ &Inst::AluRRRR {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ra,
+ } => {
+ let (op, size) = match alu_op {
+ ALUOp3::MAdd32 => ("madd", OperandSize::Size32),
+ ALUOp3::MAdd64 => ("madd", OperandSize::Size64),
+ ALUOp3::MSub32 => ("msub", OperandSize::Size32),
+ ALUOp3::MSub64 => ("msub", OperandSize::Size64),
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let rm = show_ireg_sized(rm, mb_rru, size);
+ let ra = show_ireg_sized(ra, mb_rru, size);
+
+ format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
+ }
+ &Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn,
+ ref imm12,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+
+ if imm12.bits == 0 && alu_op == ALUOp::Add64 {
+ // special-case MOV (used for moving into SP).
+ format!("mov {}, {}", rd, rn)
+ } else {
+ let imm12 = imm12.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, imm12)
+ }
+ }
+ &Inst::AluRRImmLogic {
+ alu_op,
+ rd,
+ rn,
+ ref imml,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let imml = imml.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, imml)
+ }
+ &Inst::AluRRImmShift {
+ alu_op,
+ rd,
+ rn,
+ ref immshift,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let immshift = immshift.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, immshift)
+ }
+ &Inst::AluRRRShift {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ref shiftop,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let rm = show_ireg_sized(rm, mb_rru, size);
+ let shiftop = shiftop.show_rru(mb_rru);
+ format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop)
+ }
+ &Inst::AluRRRExtend {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ref extendop,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let rm = show_ireg_sized(rm, mb_rru, size);
+ let extendop = extendop.show_rru(mb_rru);
+ format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
+ }
+ &Inst::BitRR { op, rd, rn } => {
+ let size = op.operand_size();
+ let op = op.op_str();
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::ULoad8 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::SLoad8 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::ULoad16 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::SLoad16 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::ULoad32 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::SLoad32 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::ULoad64 {
+ rd,
+ ref mem,
+ ..
+ } => {
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+
+ let is_unscaled = match &mem {
+ &AMode::Unscaled(..) => true,
+ _ => false,
+ };
+ let (op, size) = match (self, is_unscaled) {
+ (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
+ (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
+ (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
+ (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
+ (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
+ (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
+ (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
+ (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
+ (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
+ (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
+ (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
+ (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
+ (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
+ (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
+ _ => unreachable!(),
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}{} {}, {}", mem_str, op, rd, mem)
+ }
+ &Inst::Store8 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::Store16 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::Store32 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::Store64 {
+ rd,
+ ref mem,
+ ..
+ } => {
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+
+ let is_unscaled = match &mem {
+ &AMode::Unscaled(..) => true,
+ _ => false,
+ };
+ let (op, size) = match (self, is_unscaled) {
+ (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
+ (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
+ (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
+ (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
+ (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
+ (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
+ (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
+ (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
+ _ => unreachable!(),
+ };
+ let rd = show_ireg_sized(rd, mb_rru, size);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}{} {}, {}", mem_str, op, rd, mem)
+ }
+ &Inst::StoreP64 { rt, rt2, ref mem, .. } => {
+ let rt = rt.show_rru(mb_rru);
+ let rt2 = rt2.show_rru(mb_rru);
+ let mem = mem.show_rru(mb_rru);
+ format!("stp {}, {}, {}", rt, rt2, mem)
+ }
+ &Inst::LoadP64 { rt, rt2, ref mem, .. } => {
+ let rt = rt.to_reg().show_rru(mb_rru);
+ let rt2 = rt2.to_reg().show_rru(mb_rru);
+ let mem = mem.show_rru(mb_rru);
+ format!("ldp {}, {}, {}", rt, rt2, mem)
+ }
+ &Inst::Mov64 { rd, rm } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ format!("mov {}, {}", rd, rm)
+ }
+ &Inst::Mov32 { rd, rm } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+ let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32);
+ format!("mov {}, {}", rd, rm)
+ }
+ &Inst::MovZ { rd, ref imm, size } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let imm = imm.show_rru(mb_rru);
+ format!("movz {}, {}", rd, imm)
+ }
+ &Inst::MovN { rd, ref imm, size } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let imm = imm.show_rru(mb_rru);
+ format!("movn {}, {}", rd, imm)
+ }
+ &Inst::MovK { rd, ref imm, size } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let imm = imm.show_rru(mb_rru);
+ format!("movk {}, {}", rd, imm)
+ }
+ &Inst::CSel { rd, rn, rm, cond } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ let cond = cond.show_rru(mb_rru);
+ format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
+ }
+ &Inst::CSet { rd, cond } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let cond = cond.show_rru(mb_rru);
+ format!("cset {}, {}", rd, cond)
+ }
+ &Inst::CCmpImm {
+ size,
+ rn,
+ imm,
+ nzcv,
+ cond,
+ } => {
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let imm = imm.show_rru(mb_rru);
+ let nzcv = nzcv.show_rru(mb_rru);
+ let cond = cond.show_rru(mb_rru);
+ format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
+ }
+ &Inst::AtomicRMW { ty, op, .. } => {
+ format!(
+ "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
+ ty.bits(), op)
+ }
+ &Inst::AtomicCAS { ty, .. } => {
+ format!(
+ "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
+ ty.bits())
+ }
+ &Inst::AtomicLoad { ty, r_data, r_addr, .. } => {
+ format!(
+ "atomically {{ {} = zero_extend_{}_bits_at[{}] }}",
+ r_data.show_rru(mb_rru), ty.bits(), r_addr.show_rru(mb_rru))
+ }
+ &Inst::AtomicStore { ty, r_data, r_addr, .. } => {
+ format!(
+ "atomically {{ {}_bits_at[{}] = {} }}", ty.bits(), r_addr.show_rru(mb_rru), r_data.show_rru(mb_rru))
+ }
+ &Inst::Fence {} => {
+ format!("dmb ish")
+ }
+ &Inst::FpuMove64 { rd, rn } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ format!("mov {}.8b, {}.8b", rd, rn)
+ }
+ &Inst::FpuMove128 { rd, rn } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ format!("mov {}.16b, {}.16b", rd, rn)
+ }
+ &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
+ let rn = show_vreg_element(rn, mb_rru, idx, size);
+ format!("mov {}, {}", rd, rn)
+ }
+ &Inst::FpuRR { fpu_op, rd, rn } => {
+ let (op, sizesrc, sizedest) = match fpu_op {
+ FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32),
+ FPUOp1::Abs64 => ("fabs", ScalarSize::Size64, ScalarSize::Size64),
+ FPUOp1::Neg32 => ("fneg", ScalarSize::Size32, ScalarSize::Size32),
+ FPUOp1::Neg64 => ("fneg", ScalarSize::Size64, ScalarSize::Size64),
+ FPUOp1::Sqrt32 => ("fsqrt", ScalarSize::Size32, ScalarSize::Size32),
+ FPUOp1::Sqrt64 => ("fsqrt", ScalarSize::Size64, ScalarSize::Size64),
+ FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64),
+ FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
+ let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+ let (op, size) = match fpu_op {
+ FPUOp2::Add32 => ("fadd", ScalarSize::Size32),
+ FPUOp2::Add64 => ("fadd", ScalarSize::Size64),
+ FPUOp2::Sub32 => ("fsub", ScalarSize::Size32),
+ FPUOp2::Sub64 => ("fsub", ScalarSize::Size64),
+ FPUOp2::Mul32 => ("fmul", ScalarSize::Size32),
+ FPUOp2::Mul64 => ("fmul", ScalarSize::Size64),
+ FPUOp2::Div32 => ("fdiv", ScalarSize::Size32),
+ FPUOp2::Div64 => ("fdiv", ScalarSize::Size64),
+ FPUOp2::Max32 => ("fmax", ScalarSize::Size32),
+ FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
+ FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
+ FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
+ FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
+ FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
+ FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
+ FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_scalar(rn, mb_rru, size);
+ let rm = show_vreg_scalar(rm, mb_rru, size);
+ format!("{} {}, {}, {}", op, rd, rn, rm)
+ }
+ &Inst::FpuRRI { fpu_op, rd, rn } => {
+ let (op, imm, vector) = match fpu_op {
+ FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true),
+ FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false),
+ FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true),
+ FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false),
+ };
+
+ let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
+ |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
+ } else {
+ |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
+ };
+ let rd = show_vreg_fn(rd.to_reg(), mb_rru);
+ let rn = show_vreg_fn(rn, mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, imm)
+ }
+ &Inst::FpuRRRR {
+ fpu_op,
+ rd,
+ rn,
+ rm,
+ ra,
+ } => {
+ let (op, size) = match fpu_op {
+ FPUOp3::MAdd32 => ("fmadd", ScalarSize::Size32),
+ FPUOp3::MAdd64 => ("fmadd", ScalarSize::Size64),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_scalar(rn, mb_rru, size);
+ let rm = show_vreg_scalar(rm, mb_rru, size);
+ let ra = show_vreg_scalar(ra, mb_rru, size);
+ format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
+ }
+ &Inst::FpuCmp32 { rn, rm } => {
+ let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
+ let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
+ format!("fcmp {}, {}", rn, rm)
+ }
+ &Inst::FpuCmp64 { rn, rm } => {
+ let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
+ let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
+ format!("fcmp {}, {}", rn, rm)
+ }
+ &Inst::FpuLoad32 { rd, ref mem, .. } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}ldr {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuLoad64 { rd, ref mem, .. } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}ldr {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuLoad128 { rd, ref mem, .. } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rd = "q".to_string() + &rd[1..];
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}ldr {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuStore32 { rd, ref mem, .. } => {
+ let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size32);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}str {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuStore64 { rd, ref mem, .. } => {
+ let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size64);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}str {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuStore128 { rd, ref mem, .. } => {
+ let rd = rd.show_rru(mb_rru);
+ let rd = "q".to_string() + &rd[1..];
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}str {}, {}", mem_str, rd, mem)
+ }
+ &Inst::LoadFpuConst64 { rd, const_data } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+ format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, f64::from_bits(const_data))
+ }
+ &Inst::LoadFpuConst128 { rd, const_data } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
+ format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
+ }
+ &Inst::FpuToInt { op, rd, rn } => {
+ let (op, sizesrc, sizedest) = match op {
+ FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
+ FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
+ FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
+ FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
+ FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
+ FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
+ FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
+ FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest);
+ let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::IntToFpu { op, rd, rn } => {
+ let (op, sizesrc, sizedest) = match op {
+ IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
+ IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
+ IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
+ IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
+ IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
+ IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
+ IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
+ IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
+ let rn = show_ireg_sized(rn, mb_rru, sizesrc);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
+ let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
+ let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
+ let cond = cond.show_rru(mb_rru);
+ format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
+ }
+ &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+ let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
+ let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
+ let cond = cond.show_rru(mb_rru);
+ format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
+ }
+ &Inst::FpuRound { op, rd, rn } => {
+ let (inst, size) = match op {
+ FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
+ FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
+ FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
+ FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
+ FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
+ FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
+ FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
+ FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_scalar(rn, mb_rru, size);
+ format!("{} {}, {}", inst, rd, rn)
+ }
+ &Inst::MovToFpu { rd, rn, size } => {
+ let operand_size = size.operand_size();
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, operand_size);
+ format!("fmov {}, {}", rd, rn)
+ }
+ &Inst::MovToVec { rd, rn, idx, size } => {
+ let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
+ let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
+ format!("mov {}, {}", rd, rn)
+ }
+ &Inst::MovFromVec { rd, rn, idx, size } => {
+ let op = match size {
+ VectorSize::Size8x16 => "umov",
+ VectorSize::Size16x8 => "umov",
+ VectorSize::Size32x4 => "mov",
+ VectorSize::Size64x2 => "mov",
+ _ => unimplemented!(),
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
+ let rn = show_vreg_element(rn, mb_rru, idx, size);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::MovFromVecSigned {
+ rd,
+ rn,
+ idx,
+ size,
+ scalar_size,
+ } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size);
+ let rn = show_vreg_element(rn, mb_rru, idx, size);
+ format!("smov {}, {}", rd, rn)
+ }
+ &Inst::VecDup { rd, rn, size } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
+ format!("dup {}, {}", rd, rn)
+ }
+ &Inst::VecDupFromFpu { rd, rn, size } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_element(rn, mb_rru, 0, size);
+ format!("dup {}, {}", rd, rn)
+ }
+ &Inst::VecDupImm { rd, imm, invert, size } => {
+ let imm = imm.show_rru(mb_rru);
+ let op = if invert {
+ "mvni"
+ } else {
+ "movi"
+ };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+
+ format!("{} {}, {}", op, rd, imm)
+ }
+ &Inst::VecExtend { t, rd, rn, high_half } => {
+ let (op, dest, src) = match (t, high_half) {
+ (VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
+ (VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
+ (VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
+ (VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
+ (VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
+ (VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
+ (VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
+ (VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
+ (VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
+ (VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
+ (VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
+ (VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
+ };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
+ let rn = show_vreg_vector(rn, mb_rru, src);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::VecMovElement {
+ rd,
+ rn,
+ dest_idx,
+ src_idx,
+ size,
+ } => {
+ let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size);
+ let rn = show_vreg_element(rn, mb_rru, src_idx, size);
+ format!("mov {}, {}", rd, rn)
+ }
+ &Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
+ let dest_size = if high_half {
+ assert!(size.is_128bits());
+ size
+ } else {
+ size.halve()
+ };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
+ let rn = show_vreg_vector(rn, mb_rru, size.widen());
+ let op = match (op, high_half) {
+ (VecMiscNarrowOp::Xtn, false) => "xtn",
+ (VecMiscNarrowOp::Xtn, true) => "xtn2",
+ (VecMiscNarrowOp::Sqxtn, false) => "sqxtn",
+ (VecMiscNarrowOp::Sqxtn, true) => "sqxtn2",
+ (VecMiscNarrowOp::Sqxtun, false) => "sqxtun",
+ (VecMiscNarrowOp::Sqxtun, true) => "sqxtun2",
+ };
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::VecRRR {
+ rd,
+ rn,
+ rm,
+ alu_op,
+ size,
+ } => {
+ let (op, size) = match alu_op {
+ VecALUOp::Sqadd => ("sqadd", size),
+ VecALUOp::Uqadd => ("uqadd", size),
+ VecALUOp::Sqsub => ("sqsub", size),
+ VecALUOp::Uqsub => ("uqsub", size),
+ VecALUOp::Cmeq => ("cmeq", size),
+ VecALUOp::Cmge => ("cmge", size),
+ VecALUOp::Cmgt => ("cmgt", size),
+ VecALUOp::Cmhs => ("cmhs", size),
+ VecALUOp::Cmhi => ("cmhi", size),
+ VecALUOp::Fcmeq => ("fcmeq", size),
+ VecALUOp::Fcmgt => ("fcmgt", size),
+ VecALUOp::Fcmge => ("fcmge", size),
+ VecALUOp::And => ("and", VectorSize::Size8x16),
+ VecALUOp::Bic => ("bic", VectorSize::Size8x16),
+ VecALUOp::Orr => ("orr", VectorSize::Size8x16),
+ VecALUOp::Eor => ("eor", VectorSize::Size8x16),
+ VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
+ VecALUOp::Umaxp => ("umaxp", size),
+ VecALUOp::Add => ("add", size),
+ VecALUOp::Sub => ("sub", size),
+ VecALUOp::Mul => ("mul", size),
+ VecALUOp::Sshl => ("sshl", size),
+ VecALUOp::Ushl => ("ushl", size),
+ VecALUOp::Umin => ("umin", size),
+ VecALUOp::Smin => ("smin", size),
+ VecALUOp::Umax => ("umax", size),
+ VecALUOp::Smax => ("smax", size),
+ VecALUOp::Urhadd => ("urhadd", size),
+ VecALUOp::Fadd => ("fadd", size),
+ VecALUOp::Fsub => ("fsub", size),
+ VecALUOp::Fdiv => ("fdiv", size),
+ VecALUOp::Fmax => ("fmax", size),
+ VecALUOp::Fmin => ("fmin", size),
+ VecALUOp::Fmul => ("fmul", size),
+ VecALUOp::Addp => ("addp", size),
+ VecALUOp::Umlal => ("umlal", size),
+ VecALUOp::Zip1 => ("zip1", size),
+ VecALUOp::Smull => ("smull", size),
+ VecALUOp::Smull2 => ("smull2", size),
+ };
+ let rd_size = match alu_op {
+ VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
+ _ => size
+ };
+ let rn_size = match alu_op {
+ VecALUOp::Smull => size.halve(),
+ _ => size
+ };
+ let rm_size = rn_size;
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+ let rn = show_vreg_vector(rn, mb_rru, rn_size);
+ let rm = show_vreg_vector(rm, mb_rru, rm_size);
+ format!("{} {}, {}, {}", op, rd, rn, rm)
+ }
+ &Inst::VecMisc { op, rd, rn, size } => {
+ let is_shll = op == VecMisc2::Shll;
+ let suffix = match (is_shll, size) {
+ (true, VectorSize::Size8x8) => ", #8",
+ (true, VectorSize::Size16x4) => ", #16",
+ (true, VectorSize::Size32x2) => ", #32",
+ _ => "",
+ };
+
+ let (op, size) = match op {
+ VecMisc2::Not => (
+ "mvn",
+ if size.is_128bits() {
+ VectorSize::Size8x16
+ } else {
+ VectorSize::Size8x8
+ },
+ ),
+ VecMisc2::Neg => ("neg", size),
+ VecMisc2::Abs => ("abs", size),
+ VecMisc2::Fabs => ("fabs", size),
+ VecMisc2::Fneg => ("fneg", size),
+ VecMisc2::Fsqrt => ("fsqrt", size),
+ VecMisc2::Rev64 => ("rev64", size),
+ VecMisc2::Shll => ("shll", size),
+ VecMisc2::Fcvtzs => ("fcvtzs", size),
+ VecMisc2::Fcvtzu => ("fcvtzu", size),
+ VecMisc2::Scvtf => ("scvtf", size),
+ VecMisc2::Ucvtf => ("ucvtf", size),
+ VecMisc2::Frintn => ("frintn", size),
+ VecMisc2::Frintz => ("frintz", size),
+ VecMisc2::Frintm => ("frintm", size),
+ VecMisc2::Frintp => ("frintp", size),
+ };
+
+ let rd_size = if is_shll { size.widen() } else { size };
+
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+ let rn = show_vreg_vector(rn, mb_rru, size);
+ format!("{} {}, {}{}", op, rd, rn, suffix)
+ }
+ &Inst::VecLanes { op, rd, rn, size } => {
+ let op = match op {
+ VecLanesOp::Uminv => "uminv",
+ VecLanesOp::Addv => "addv",
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
+ let rn = show_vreg_vector(rn, mb_rru, size);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::VecShiftImm { op, rd, rn, size, imm } => {
+ let op = match op {
+ VecShiftImmOp::Shl => "shl",
+ VecShiftImmOp::Ushr => "ushr",
+ VecShiftImmOp::Sshr => "sshr",
+ };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_vector(rn, mb_rru, size);
+ format!("{} {}, {}, #{}", op, rd, rn, imm)
+ }
+ &Inst::VecExtract { rd, rn, rm, imm4 } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+ let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+ let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+ format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
+ }
+ &Inst::VecTbl {
+ rd,
+ rn,
+ rm,
+ is_extension,
+ } => {
+ let op = if is_extension { "tbx" } else { "tbl" };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+ let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+ let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+ format!("{} {}, {{ {} }}, {}", op, rd, rn, rm)
+ }
+ &Inst::VecTbl2 {
+ rd,
+ rn,
+ rn2,
+ rm,
+ is_extension,
+ } => {
+ let op = if is_extension { "tbx" } else { "tbl" };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+ let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+ let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16);
+ let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+ format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
+ }
+ &Inst::VecLoadReplicate { rd, rn, size, .. } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+ let rn = rn.show_rru(mb_rru);
+
+ format!("ld1r {{ {} }}, [{}]", rd, rn)
+ }
+ &Inst::VecCSel { rd, rn, rm, cond } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+ let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+ let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+ let cond = cond.show_rru(mb_rru);
+ format!("vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond)
+ }
+ &Inst::MovToNZCV { rn } => {
+ let rn = rn.show_rru(mb_rru);
+ format!("msr nzcv, {}", rn)
+ }
+ &Inst::MovFromNZCV { rd } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ format!("mrs {}, nzcv", rd)
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits >= 8 => {
+ // Is the destination a 32-bit register? Corresponds to whether
+ // extend-to width is <= 32 bits, *unless* we have an unsigned
+ // 32-to-64-bit extension, which is implemented with a "mov" to a
+ // 32-bit (W-reg) dest, because this zeroes the top 32 bits.
+ let dest_size = if !signed && from_bits == 32 && to_bits == 64 {
+ OperandSize::Size32
+ } else {
+ OperandSize::from_bits(to_bits)
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+ let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
+ let op = match (signed, from_bits, to_bits) {
+ (false, 8, 32) => "uxtb",
+ (true, 8, 32) => "sxtb",
+ (false, 16, 32) => "uxth",
+ (true, 16, 32) => "sxth",
+ (false, 8, 64) => "uxtb",
+ (true, 8, 64) => "sxtb",
+ (false, 16, 64) => "uxth",
+ (true, 16, 64) => "sxth",
+ (false, 32, 64) => "mov", // special case (see above).
+ (true, 32, 64) => "sxtw",
+ _ => panic!("Unsupported Extend case: {:?}", self),
+ };
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits == 1 && signed => {
+ let dest_size = OperandSize::from_bits(to_bits);
+ let zr = if dest_size.is32() { "wzr" } else { "xzr" };
+ let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+ let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+ format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd)
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ ..
+ } if from_bits == 1 && !signed => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+ let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+ format!("and {}, {}, #1", rd, rn)
+ }
+ &Inst::Extend { .. } => {
+ panic!("Unsupported Extend case");
+ }
+ &Inst::Call { .. } => format!("bl 0"),
+ &Inst::CallInd { ref info, .. } => {
+ let rn = info.rn.show_rru(mb_rru);
+ format!("blr {}", rn)
+ }
+ &Inst::Ret => "ret".to_string(),
+ &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
+ &Inst::Jump { ref dest } => {
+ let dest = dest.show_rru(mb_rru);
+ format!("b {}", dest)
+ }
+ &Inst::CondBr {
+ ref taken,
+ ref not_taken,
+ ref kind,
+ } => {
+ let taken = taken.show_rru(mb_rru);
+ let not_taken = not_taken.show_rru(mb_rru);
+ match kind {
+ &CondBrKind::Zero(reg) => {
+ let reg = reg.show_rru(mb_rru);
+ format!("cbz {}, {} ; b {}", reg, taken, not_taken)
+ }
+ &CondBrKind::NotZero(reg) => {
+ let reg = reg.show_rru(mb_rru);
+ format!("cbnz {}, {} ; b {}", reg, taken, not_taken)
+ }
+ &CondBrKind::Cond(c) => {
+ let c = c.show_rru(mb_rru);
+ format!("b.{} {} ; b {}", c, taken, not_taken)
+ }
+ }
+ }
+ &Inst::IndirectBr { rn, .. } => {
+ let rn = rn.show_rru(mb_rru);
+ format!("br {}", rn)
+ }
+ &Inst::Brk => "brk #0".to_string(),
+ &Inst::Udf { .. } => "udf".to_string(),
+ &Inst::TrapIf { ref kind, .. } => match kind {
+ &CondBrKind::Zero(reg) => {
+ let reg = reg.show_rru(mb_rru);
+ format!("cbnz {}, 8 ; udf", reg)
+ }
+ &CondBrKind::NotZero(reg) => {
+ let reg = reg.show_rru(mb_rru);
+ format!("cbz {}, 8 ; udf", reg)
+ }
+ &CondBrKind::Cond(c) => {
+ let c = c.invert().show_rru(mb_rru);
+ format!("b.{} 8 ; udf", c)
+ }
+ },
+ &Inst::Adr { rd, off } => {
+ let rd = rd.show_rru(mb_rru);
+ format!("adr {}, pc+{}", rd, off)
+ }
+ &Inst::Word4 { data } => format!("data.i32 {}", data),
+ &Inst::Word8 { data } => format!("data.i64 {}", data),
+ &Inst::JTSequence {
+ ref info,
+ ridx,
+ rtmp1,
+ rtmp2,
+ ..
+ } => {
+ let ridx = ridx.show_rru(mb_rru);
+ let rtmp1 = rtmp1.show_rru(mb_rru);
+ let rtmp2 = rtmp2.show_rru(mb_rru);
+ let default_target = info.default_target.show_rru(mb_rru);
+ format!(
+ concat!(
+ "b.hs {} ; ",
+ "adr {}, pc+16 ; ",
+ "ldrsw {}, [{}, {}, LSL 2] ; ",
+ "add {}, {}, {} ; ",
+ "br {} ; ",
+ "jt_entries {:?}"
+ ),
+ default_target,
+ rtmp1,
+ rtmp2,
+ rtmp1,
+ ridx,
+ rtmp1,
+ rtmp1,
+ rtmp2,
+ rtmp1,
+ info.targets
+ )
+ }
+ &Inst::LoadExtName {
+ rd,
+ ref name,
+ offset,
+ } => {
+ let rd = rd.show_rru(mb_rru);
+ format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
+ }
+ &Inst::LoadAddr { rd, ref mem } => {
+ // TODO: we really should find a better way to avoid duplication of
+ // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
+ // expansion stage (i.e., legalization, but without the slow edit-in-place
+ // of the existing legalization framework).
+ let (mem_insts, mem) = mem_finalize(0, mem, state);
+ let mut ret = String::new();
+ for inst in mem_insts.into_iter() {
+ ret.push_str(&inst.show_rru(mb_rru));
+ }
+ let (reg, offset) = match mem {
+ AMode::Unscaled(r, simm9) => (r, simm9.value()),
+ AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
+ _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
+ };
+ let abs_offset = if offset < 0 {
+ -offset as u64
+ } else {
+ offset as u64
+ };
+ let alu_op = if offset < 0 {
+ ALUOp::Sub64
+ } else {
+ ALUOp::Add64
+ };
+
+ if offset == 0 {
+ let mov = Inst::mov(rd, reg);
+ ret.push_str(&mov.show_rru(mb_rru));
+ } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
+ let add = Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn: reg,
+ imm12,
+ };
+ ret.push_str(&add.show_rru(mb_rru));
+ } else {
+ let tmp = writable_spilltmp_reg();
+ for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
+ ret.push_str(&inst.show_rru(mb_rru));
+ }
+ let add = Inst::AluRRR {
+ alu_op,
+ rd,
+ rn: reg,
+ rm: tmp.to_reg(),
+ };
+ ret.push_str(&add.show_rru(mb_rru));
+ }
+ ret
+ }
+ &Inst::VirtualSPOffsetAdj { offset } => {
+ state.virtual_sp_offset += offset;
+ format!("virtual_sp_offset_adjust {}", offset)
+ }
+ &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space),
+ }
+ }
+}
+
+//=============================================================================
+// Label fixups and jump veneers.
+
+/// Different forms of label references for different instruction formats.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LabelUse {
+ /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
+ /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
+ Branch19,
+ /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
+ /// signed bits, in bits 25:0. Used by b, bl.
+ Branch26,
+ /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
+ /// in bits 23:5.
+ Ldr19,
+ /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
+ /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
+ Adr21,
+ /// 32-bit PC relative constant offset (from address of constant itself),
+ /// signed. Used in jump tables.
+ PCRel32,
+}
+
+impl MachInstLabelUse for LabelUse {
+ /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
+ const ALIGN: CodeOffset = 4;
+
+ /// Maximum PC-relative range (positive), inclusive.
+ fn max_pos_range(self) -> CodeOffset {
+ match self {
+ // 19-bit immediate, left-shifted by 2, for 21 bits of total range. Signed, so +2^20
+ // from zero. Likewise for two other shifted cases below.
+ LabelUse::Branch19 => (1 << 20) - 1,
+ LabelUse::Branch26 => (1 << 27) - 1,
+ LabelUse::Ldr19 => (1 << 20) - 1,
+ // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
+ // range.
+ LabelUse::Adr21 => (1 << 20) - 1,
+ LabelUse::PCRel32 => 0x7fffffff,
+ }
+ }
+
+ /// Maximum PC-relative range (negative).
+ fn max_neg_range(self) -> CodeOffset {
+ // All forms are twos-complement signed offsets, so negative limit is one more than
+ // positive limit.
+ self.max_pos_range() + 1
+ }
+
+ /// Size of window into code needed to do the patch.
+ fn patch_size(self) -> CodeOffset {
+ // Patch is on one instruction only for all of these label reference types.
+ 4
+ }
+
+ /// Perform the patch.
+ fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+ let pc_rel = (label_offset as i64) - (use_offset as i64);
+ debug_assert!(pc_rel <= self.max_pos_range() as i64);
+ debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
+ let pc_rel = pc_rel as u32;
+ let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+ let mask = match self {
+ LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
+ LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
+ LabelUse::Ldr19 => 0x00ffffe0, // bits 23..5 inclusive
+ LabelUse::Adr21 => 0x60ffffe0, // bits 30..29, 25..5 inclusive
+ LabelUse::PCRel32 => 0xffffffff,
+ };
+ let pc_rel_shifted = match self {
+ LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
+ _ => {
+ debug_assert!(pc_rel & 3 == 0);
+ pc_rel >> 2
+ }
+ };
+ let pc_rel_inserted = match self {
+ LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
+ LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
+ LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10,
+ LabelUse::PCRel32 => pc_rel_shifted,
+ };
+ let is_add = match self {
+ LabelUse::PCRel32 => true,
+ _ => false,
+ };
+ let insn_word = if is_add {
+ insn_word.wrapping_add(pc_rel_inserted)
+ } else {
+ (insn_word & !mask) | pc_rel_inserted
+ };
+ buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
+ }
+
+ /// Is a veneer supported for this label reference type?
+ fn supports_veneer(self) -> bool {
+ match self {
+ LabelUse::Branch19 => true, // veneer is a Branch26
+ _ => false,
+ }
+ }
+
+ /// How large is the veneer, if supported?
+ fn veneer_size(self) -> CodeOffset {
+ 4
+ }
+
+ /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
+ /// an offset and label-use for the veneer's use of the original label.
+ fn generate_veneer(
+ self,
+ buffer: &mut [u8],
+ veneer_offset: CodeOffset,
+ ) -> (CodeOffset, LabelUse) {
+ match self {
+ LabelUse::Branch19 => {
+ // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
+ // bother with constructing an Inst.
+ let insn_word = 0b000101 << 26;
+ buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
+ (veneer_offset, LabelUse::Branch26)
+ }
+ _ => panic!("Unsupported label-reference type for veneer generation!"),
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
new file mode 100644
index 0000000000..0b4babe04a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
@@ -0,0 +1,351 @@
+//! AArch64 ISA definitions: registers.
+
+use crate::isa::aarch64::inst::OperandSize;
+use crate::isa::aarch64::inst::ScalarSize;
+use crate::isa::aarch64::inst::VectorSize;
+use crate::settings;
+
+use regalloc::{
+ PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES,
+};
+
+use std::string::{String, ToString};
+
+//=============================================================================
+// Registers, the Universe thereof, and printing
+
+/// The pinned register on this architecture.
+/// It must be the same as Spidermonkey's HeapReg, as found in this file.
+/// https://searchfox.org/mozilla-central/source/js/src/jit/arm64/Assembler-arm64.h#103
+pub const PINNED_REG: u8 = 21;
+
+#[rustfmt::skip]
+const XREG_INDICES: [u8; 31] = [
+ // X0 - X7
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ // X8 - X15
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ // X16, X17
+ 58, 59,
+ // X18
+ 60,
+ // X19, X20
+ 48, 49,
+ // X21, put aside because it's the pinned register.
+ 57,
+ // X22 - X28
+ 50, 51, 52, 53, 54, 55, 56,
+ // X29 (FP)
+ 61,
+ // X30 (LR)
+ 62,
+];
+
+const ZERO_REG_INDEX: u8 = 63;
+
+const SP_REG_INDEX: u8 = 64;
+
+/// Get a reference to an X-register (integer register).
+pub fn xreg(num: u8) -> Reg {
+ assert!(num < 31);
+ Reg::new_real(
+ RegClass::I64,
+ /* enc = */ num,
+ /* index = */ XREG_INDICES[num as usize],
+ )
+}
+
+/// Get a writable reference to an X-register.
+pub fn writable_xreg(num: u8) -> Writable<Reg> {
+ Writable::from_reg(xreg(num))
+}
+
+/// Get a reference to a V-register (vector/FP register).
+pub fn vreg(num: u8) -> Reg {
+ assert!(num < 32);
+ Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
+}
+
+/// Get a writable reference to a V-register.
+pub fn writable_vreg(num: u8) -> Writable<Reg> {
+ Writable::from_reg(vreg(num))
+}
+
+/// Get a reference to the zero-register.
+pub fn zero_reg() -> Reg {
+ // This should be the same as what xreg(31) returns, except that
+ // we use the special index into the register index space.
+ Reg::new_real(
+ RegClass::I64,
+ /* enc = */ 31,
+ /* index = */ ZERO_REG_INDEX,
+ )
+}
+
+/// Get a writable reference to the zero-register (this discards a result).
+pub fn writable_zero_reg() -> Writable<Reg> {
+ Writable::from_reg(zero_reg())
+}
+
+/// Get a reference to the stack-pointer register.
+pub fn stack_reg() -> Reg {
+ // XSP (stack) and XZR (zero) are logically different registers which have
+ // the same hardware encoding, and whose meaning, in real aarch64
+ // instructions, is context-dependent. For convenience of
+ // universe-construction and for correct printing, we make them be two
+ // different real registers.
+ Reg::new_real(
+ RegClass::I64,
+ /* enc = */ 31,
+ /* index = */ SP_REG_INDEX,
+ )
+}
+
+/// Get a writable reference to the stack-pointer register.
+pub fn writable_stack_reg() -> Writable<Reg> {
+ Writable::from_reg(stack_reg())
+}
+
+/// Get a reference to the link register (x30).
+pub fn link_reg() -> Reg {
+ xreg(30)
+}
+
+/// Get a writable reference to the link register.
+pub fn writable_link_reg() -> Writable<Reg> {
+ Writable::from_reg(link_reg())
+}
+
+/// Get a reference to the frame pointer (x29).
+pub fn fp_reg() -> Reg {
+ xreg(29)
+}
+
+/// Get a writable reference to the frame pointer.
+pub fn writable_fp_reg() -> Writable<Reg> {
+ Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
+/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
+/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
+/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
+/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
+///
+/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
+/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
+/// to live through call instructions.
+pub fn spilltmp_reg() -> Reg {
+ xreg(16)
+}
+
+/// Get a writable reference to the spilltmp reg.
+pub fn writable_spilltmp_reg() -> Writable<Reg> {
+ Writable::from_reg(spilltmp_reg())
+}
+
+/// Get a reference to the second temp register. We need this in some edge cases
+/// where we need both the spilltmp and another temporary.
+///
+/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
+/// free to use otherwise.
+pub fn tmp2_reg() -> Reg {
+ xreg(17)
+}
+
+/// Get a writable reference to the tmp2 reg.
+pub fn writable_tmp2_reg() -> Writable<Reg> {
+ Writable::from_reg(tmp2_reg())
+}
+
+/// Create the register universe for AArch64.
+pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
+ let mut regs = vec![];
+ let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+ // Numbering Scheme: we put V-regs first, then X-regs. The X-regs exclude several registers:
+ // x18 (globally reserved for platform-specific purposes), x29 (frame pointer), x30 (link
+ // register), x31 (stack pointer or zero register, depending on context).
+
+ let v_reg_base = 0u8; // in contiguous real-register index space
+ let v_reg_count = 32;
+ for i in 0u8..v_reg_count {
+ let reg = Reg::new_real(
+ RegClass::V128,
+ /* enc = */ i,
+ /* index = */ v_reg_base + i,
+ )
+ .to_real_reg();
+ let name = format!("v{}", i);
+ regs.push((reg, name));
+ }
+ let v_reg_last = v_reg_base + v_reg_count - 1;
+
+ // Add the X registers. N.B.: the order here must match the order implied
+ // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
+
+ let x_reg_base = 32u8; // in contiguous real-register index space
+ let mut x_reg_count = 0;
+
+ let uses_pinned_reg = flags.enable_pinned_reg();
+
+ for i in 0u8..32u8 {
+ // See above for excluded registers.
+ if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
+ continue;
+ }
+ let reg = Reg::new_real(
+ RegClass::I64,
+ /* enc = */ i,
+ /* index = */ x_reg_base + x_reg_count,
+ )
+ .to_real_reg();
+ let name = format!("x{}", i);
+ regs.push((reg, name));
+ x_reg_count += 1;
+ }
+ let x_reg_last = x_reg_base + x_reg_count - 1;
+
+ allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+ first: x_reg_base as usize,
+ last: x_reg_last as usize,
+ suggested_scratch: Some(XREG_INDICES[19] as usize),
+ });
+ allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
+ first: v_reg_base as usize,
+ last: v_reg_last as usize,
+ suggested_scratch: Some(/* V31: */ 31),
+ });
+
+ // Other regs, not available to the allocator.
+ let allocable = if uses_pinned_reg {
+ // The pinned register is not allocatable in this case, so record the length before adding
+ // it.
+ let len = regs.len();
+ regs.push((xreg(PINNED_REG).to_real_reg(), "x21/pinned_reg".to_string()));
+ len
+ } else {
+ regs.push((xreg(PINNED_REG).to_real_reg(), "x21".to_string()));
+ regs.len()
+ };
+
+ regs.push((xreg(16).to_real_reg(), "x16".to_string()));
+ regs.push((xreg(17).to_real_reg(), "x17".to_string()));
+ regs.push((xreg(18).to_real_reg(), "x18".to_string()));
+ regs.push((fp_reg().to_real_reg(), "fp".to_string()));
+ regs.push((link_reg().to_real_reg(), "lr".to_string()));
+ regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
+ regs.push((stack_reg().to_real_reg(), "sp".to_string()));
+
+ // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
+ // to 65, which is potentially inconvenient from a compiler performance
+ // standpoint. We could possibly drop back to 64 by "losing" a vector
+ // register in future.
+
+ // Assert sanity: the indices in the register structs must match their
+ // actual indices in the array.
+ for (i, reg) in regs.iter().enumerate() {
+ assert_eq!(i, reg.0.get_index());
+ }
+
+ RealRegUniverse {
+ regs,
+ allocable,
+ allocable_by_class,
+ }
+}
+
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
+/// its name at the 32-bit size.
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String {
+ let mut s = reg.show_rru(mb_rru);
+ if reg.get_class() != RegClass::I64 || !size.is32() {
+ // We can't do any better.
+ return s;
+ }
+
+ if reg.is_real() {
+ // Change (eg) "x42" into "w42" as appropriate
+ if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
+ s = "w".to_string() + &s[1..];
+ }
+ } else {
+ // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
+ if reg.get_class() == RegClass::I64 && size.is32() {
+ s.push('w');
+ }
+ }
+ s
+}
+
+/// Show a vector register used in a scalar context.
+pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String {
+ let mut s = reg.show_rru(mb_rru);
+ if reg.get_class() != RegClass::V128 {
+ // We can't do any better.
+ return s;
+ }
+
+ if reg.is_real() {
+ // Change (eg) "v0" into "d0".
+ if s.starts_with("v") {
+ let replacement = match size {
+ ScalarSize::Size8 => "b",
+ ScalarSize::Size16 => "h",
+ ScalarSize::Size32 => "s",
+ ScalarSize::Size64 => "d",
+ ScalarSize::Size128 => "q",
+ };
+ s.replace_range(0..1, replacement);
+ }
+ } else {
+ // Add a "d" suffix to RegClass::V128 vregs.
+ if reg.get_class() == RegClass::V128 {
+ s.push('d');
+ }
+ }
+ s
+}
+
+/// Show a vector register.
+pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
+ assert_eq!(RegClass::V128, reg.get_class());
+ let mut s = reg.show_rru(mb_rru);
+
+ let suffix = match size {
+ VectorSize::Size8x8 => ".8b",
+ VectorSize::Size8x16 => ".16b",
+ VectorSize::Size16x4 => ".4h",
+ VectorSize::Size16x8 => ".8h",
+ VectorSize::Size32x2 => ".2s",
+ VectorSize::Size32x4 => ".4s",
+ VectorSize::Size64x2 => ".2d",
+ };
+
+ s.push_str(suffix);
+ s
+}
+
+/// Show an indexed vector element.
+pub fn show_vreg_element(
+ reg: Reg,
+ mb_rru: Option<&RealRegUniverse>,
+ idx: u8,
+ size: VectorSize,
+) -> String {
+ assert_eq!(RegClass::V128, reg.get_class());
+ let mut s = reg.show_rru(mb_rru);
+
+ let suffix = match size {
+ VectorSize::Size8x8 => "b",
+ VectorSize::Size8x16 => "b",
+ VectorSize::Size16x4 => "h",
+ VectorSize::Size16x8 => "h",
+ VectorSize::Size32x2 => "s",
+ VectorSize::Size32x4 => "s",
+ VectorSize::Size64x2 => "d",
+ };
+
+ s.push_str(&format!(".{}[{}]", suffix, idx));
+ s
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs
new file mode 100644
index 0000000000..698e094795
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs
@@ -0,0 +1,201 @@
+use super::*;
+use crate::isa::aarch64::inst::{args::PairAMode, imms::Imm12, regs, ALUOp, Inst};
+use crate::isa::unwind::input::{UnwindCode, UnwindInfo};
+use crate::machinst::UnwindInfoContext;
+use crate::result::CodegenResult;
+use alloc::vec::Vec;
+use regalloc::Reg;
+
+#[cfg(feature = "unwind")]
+pub(crate) mod systemv;
+
+pub struct AArch64UnwindInfo;
+
+impl UnwindInfoGenerator<Inst> for AArch64UnwindInfo {
+ fn create_unwind_info(
+ context: UnwindInfoContext<Inst>,
+ ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
+ let word_size = 8u8;
+ let pair_size = word_size * 2;
+ let mut codes = Vec::new();
+
+ for i in context.prologue.clone() {
+ let i = i as usize;
+ let inst = &context.insts[i];
+ let offset = context.insts_layout[i];
+
+ match inst {
+ Inst::StoreP64 {
+ rt,
+ rt2,
+ mem: PairAMode::PreIndexed(rn, imm7),
+ ..
+ } if *rt == regs::fp_reg()
+ && *rt2 == regs::link_reg()
+ && *rn == regs::writable_stack_reg()
+ && imm7.value == -(pair_size as i16) =>
+ {
+ // stp fp (x29), lr (x30), [sp, #-16]!
+ codes.push((
+ offset,
+ UnwindCode::StackAlloc {
+ size: pair_size as u32,
+ },
+ ));
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *rt,
+ stack_offset: 0,
+ },
+ ));
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *rt2,
+ stack_offset: word_size as u32,
+ },
+ ));
+ }
+ Inst::StoreP64 {
+ rt,
+ rt2,
+ mem: PairAMode::PreIndexed(rn, imm7),
+ ..
+ } if rn.to_reg() == regs::stack_reg() && imm7.value % (pair_size as i16) == 0 => {
+ // stp r1, r2, [sp, #(i * #16)]
+ let stack_offset = imm7.value as u32;
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *rt,
+ stack_offset,
+ },
+ ));
+ if *rt2 != regs::zero_reg() {
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *rt2,
+ stack_offset: stack_offset + word_size as u32,
+ },
+ ));
+ }
+ }
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add64,
+ rd,
+ rn,
+ imm12:
+ Imm12 {
+ bits: 0,
+ shift12: false,
+ },
+ } if *rd == regs::writable_fp_reg() && *rn == regs::stack_reg() => {
+ // mov fp (x29), sp.
+ codes.push((offset, UnwindCode::SetFramePointer { reg: rd.to_reg() }));
+ }
+ Inst::VirtualSPOffsetAdj { offset: adj } if offset > 0 => {
+ codes.push((offset, UnwindCode::StackAlloc { size: *adj as u32 }));
+ }
+ _ => {}
+ }
+ }
+
+ // TODO epilogues
+
+ let prologue_size = if context.prologue.is_empty() {
+ 0
+ } else {
+ context.insts_layout[context.prologue.end as usize - 1]
+ };
+
+ Ok(Some(UnwindInfo {
+ prologue_size,
+ prologue_unwind_codes: codes,
+ epilogues_unwind_codes: vec![],
+ function_size: context.len,
+ word_size,
+ initial_sp_offset: 0,
+ }))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{ExternalName, Function, InstBuilder, Signature, StackSlotData, StackSlotKind};
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ fn test_simple_func() {
+ let isa = lookup(triple!("aarch64"))
+ .expect("expect aarch64 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::SystemV,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let result = context.mach_compile_result.unwrap();
+ let unwind_info = result.unwind_info.unwrap();
+
+ assert_eq!(
+ unwind_info,
+ UnwindInfo {
+ prologue_size: 12,
+ prologue_unwind_codes: vec![
+ (4, UnwindCode::StackAlloc { size: 16 }),
+ (
+ 4,
+ UnwindCode::SaveRegister {
+ reg: regs::fp_reg(),
+ stack_offset: 0
+ }
+ ),
+ (
+ 4,
+ UnwindCode::SaveRegister {
+ reg: regs::link_reg(),
+ stack_offset: 8
+ }
+ ),
+ (
+ 8,
+ UnwindCode::SetFramePointer {
+ reg: regs::fp_reg()
+ }
+ )
+ ],
+ epilogues_unwind_codes: vec![],
+ function_size: 24,
+ word_size: 8,
+ initial_sp_offset: 0,
+ }
+ );
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs
new file mode 100644
index 0000000000..b988314b1b
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs
@@ -0,0 +1,158 @@
+//! Unwind information for System V ABI (Aarch64).
+
+use crate::isa::aarch64::inst::regs;
+use crate::isa::unwind::input;
+use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
+use crate::result::CodegenResult;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
+use regalloc::{Reg, RegClass};
+
+/// Creates a new aarch64 common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+ use gimli::write::CallFrameInstruction;
+
+ let mut entry = CommonInformationEntry::new(
+ Encoding {
+ address_size: 8,
+ format: Format::Dwarf32,
+ version: 1,
+ },
+ 4, // Code alignment factor
+ -8, // Data alignment factor
+ Register(regs::link_reg().get_hw_encoding().into()),
+ );
+
+ // Every frame will start with the call frame address (CFA) at SP
+ let sp = Register(regs::stack_reg().get_hw_encoding().into());
+ entry.add_instruction(CallFrameInstruction::Cfa(sp, 0));
+
+ entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+ match reg.get_class() {
+ RegClass::I64 => Ok(Register(reg.get_hw_encoding().into())),
+ _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
+ }
+}
+
+pub(crate) fn create_unwind_info(
+ unwind: input::UnwindInfo<Reg>,
+) -> CodegenResult<Option<UnwindInfo>> {
+ struct RegisterMapper;
+ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+ fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+ Ok(map_reg(reg)?.0)
+ }
+ fn sp(&self) -> u16 {
+ regs::stack_reg().get_hw_encoding().into()
+ }
+ }
+ let map = RegisterMapper;
+ Ok(Some(UnwindInfo::build(unwind, &map)?))
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{
+ types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
+ StackSlotKind,
+ };
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use gimli::write::Address;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ fn test_simple_func() {
+ let isa = lookup(triple!("aarch64"))
+ .expect("expect aarch64 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::SystemV,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match context
+ .create_unwind_info(isa.as_ref())
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(1234))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+
+ #[test]
+ fn test_multi_return_func() {
+ let isa = lookup(triple!("aarch64"))
+ .expect("expect aarch64 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match context
+ .create_unwind_info(isa.as_ref())
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(4321))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 40, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+ }
+
+ fn create_multi_return_function(call_conv: CallConv) -> Function {
+ let mut sig = Signature::new(call_conv);
+ sig.params.push(AbiParam::new(types::I32));
+ let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+ let block0 = func.dfg.make_block();
+ let v0 = func.dfg.append_block_param(block0, types::I32);
+ let block1 = func.dfg.make_block();
+ let block2 = func.dfg.make_block();
+
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().brnz(v0, block2, &[]);
+ pos.ins().jump(block1, &[]);
+
+ pos.insert_block(block1);
+ pos.ins().return_(&[]);
+
+ pos.insert_block(block2);
+ pos.ins().return_(&[]);
+
+ func
+ }
+}