8 files changed, 14022 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
new file mode 100644
index 0000000000..7bd181c86b
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
@@ -0,0 +1,728 @@
+//! AArch64 ISA definitions: instruction arguments.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+
+use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
+use crate::ir::Type;
+use crate::isa::aarch64::inst::*;
+use crate::machinst::{ty_bits, MachLabel};
+
+use regalloc::{PrettyPrint, RealRegUniverse, Reg, Writable};
+
+use core::convert::Into;
+use std::string::String;
+
+//=============================================================================
+// Instruction sub-components: shift and extend descriptors
+
+/// A shift operator for a register or immediate.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ShiftOp {
+    LSL = 0b00,
+    LSR = 0b01,
+    ASR = 0b10,
+    ROR = 0b11,
+}
+
+impl ShiftOp {
+    /// Get the encoding of this shift op.
+    pub fn bits(self) -> u8 {
+        self as u8
+    }
+}
+
+/// A shift operator amount.
+#[derive(Clone, Copy, Debug)]
+pub struct ShiftOpShiftImm(u8);
+
+impl ShiftOpShiftImm {
+    /// Maximum shift for shifted-register operands.
+    pub const MAX_SHIFT: u64 = 63;
+
+    /// Create a new shiftop shift amount, if possible.
+    pub fn maybe_from_shift(shift: u64) -> Option<ShiftOpShiftImm> {
+        if shift <= Self::MAX_SHIFT {
+            Some(ShiftOpShiftImm(shift as u8))
+        } else {
+            None
+        }
+    }
+
+    /// Return the shift amount.
+    pub fn value(self) -> u8 {
+        self.0
+    }
+
+    /// Mask down to a given number of bits.
+    pub fn mask(self, bits: u8) -> ShiftOpShiftImm {
+        ShiftOpShiftImm(self.0 & (bits - 1))
+    }
+}
+
+/// A shift operator with an amount, guaranteed to be within range.
+#[derive(Clone, Debug)]
+pub struct ShiftOpAndAmt {
+    op: ShiftOp,
+    shift: ShiftOpShiftImm,
+}
+
+impl ShiftOpAndAmt {
+    pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
+        ShiftOpAndAmt { op, shift }
+    }
+
+    /// Get the shift op.
+    pub fn op(&self) -> ShiftOp {
+        self.op
+    }
+
+    /// Get the shift amount.
+    pub fn amt(&self) -> ShiftOpShiftImm {
+        self.shift
+    }
+}
+
+/// An extend operator for a register.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ExtendOp {
+    UXTB = 0b000,
+    UXTH = 0b001,
+    UXTW = 0b010,
+    UXTX = 0b011,
+    SXTB = 0b100,
+    SXTH = 0b101,
+    SXTW = 0b110,
+    SXTX = 0b111,
+}
+
+impl ExtendOp {
+    /// Encoding of this op.
+    pub fn bits(self) -> u8 {
+        self as u8
+    }
+}
+
+//=============================================================================
+// Instruction sub-components (memory addresses): definitions
+
+/// A reference to some memory address.
+#[derive(Clone, Debug)]
+pub enum MemLabel {
+    /// An address in the code, a constant pool or jumptable, with relative
+    /// offset from this instruction. This form must be used at emission time;
+    /// see `memlabel_finalize()` for how other forms are lowered to this one.
+    PCRel(i32),
+}
+
+/// An addressing mode specified for a load/store operation.
+#[derive(Clone, Debug)]
+pub enum AMode {
+    //
+    // Real ARM64 addressing modes:
+    //
+    /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
+    PostIndexed(Writable<Reg>, SImm9),
+    /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
+    PreIndexed(Writable<Reg>, SImm9),
+
+    // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
+    // what the ISA calls the "register offset" addressing mode. We split out
+    // several options here for more ergonomic codegen.
+    /// Register plus register offset.
+    RegReg(Reg, Reg),
+
+    /// Register plus register offset, scaled by type's size.
+    RegScaled(Reg, Reg, Type),
+
+    /// Register plus register offset, scaled by type's size, with index sign- or zero-extended
+    /// first.
+    RegScaledExtended(Reg, Reg, Type, ExtendOp),
+
+    /// Register plus register offset, with index sign- or zero-extended first.
+    RegExtended(Reg, Reg, ExtendOp),
+
+    /// Unscaled signed 9-bit immediate offset from reg.
+    Unscaled(Reg, SImm9),
+
+    /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
+    UnsignedOffset(Reg, UImm12Scaled),
+
+    //
+    // virtual addressing modes that are lowered at emission time:
+    //
+    /// Reference to a "label": e.g., a symbol.
+    Label(MemLabel),
+
+    /// Arbitrary offset from a register. Converted to generation of large
+    /// offsets with multiple instructions as necessary during code emission.
+    RegOffset(Reg, i64, Type),
+
+    /// Offset from the stack pointer.
+    SPOffset(i64, Type),
+
+    /// Offset from the frame pointer.
+    FPOffset(i64, Type),
+
+    /// Offset from the "nominal stack pointer", which is where the real SP is
+    /// just after stack and spill slots are allocated in the function prologue.
+    /// At emission time, this is converted to `SPOffset` with a fixup added to
+    /// the offset constant. The fixup is a running value that is tracked as
+    /// emission iterates through instructions in linear order, and can be
+    /// adjusted up and down with [Inst::VirtualSPOffsetAdj].
+    ///
+    /// The standard ABI is in charge of handling this (by emitting the
+    /// adjustment meta-instructions). It maintains the invariant that "nominal
+    /// SP" is where the actual SP is after the function prologue and before
+    /// clobber pushes. See the diagram in the documentation for
+    /// [crate::isa::aarch64::abi](the ABI module) for more details.
+    NominalSPOffset(i64, Type),
+}
+
+impl AMode {
+    /// Memory reference using an address in a register.
+    pub fn reg(reg: Reg) -> AMode {
+        // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur.
+        // This also does not use PostIndexed / PreIndexed as they update the register.
+        AMode::UnsignedOffset(reg, UImm12Scaled::zero(I64))
+    }
+
+    /// Memory reference using the sum of two registers as an address.
+    pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> AMode {
+        AMode::RegReg(reg1, reg2)
+    }
+
+    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
+    pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> AMode {
+        AMode::RegScaled(reg1, reg2, ty)
+    }
+
+    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or
+    /// zero-extended as per `op`.
+    pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> AMode {
+        AMode::RegScaledExtended(reg1, reg2, ty, op)
+    }
+
+    /// Memory reference to a label: a global function or value, or data in the constant pool.
+    pub fn label(label: MemLabel) -> AMode {
+        AMode::Label(label)
+    }
+}
+
+/// A memory argument to a load/store-pair.
+#[derive(Clone, Debug)]
+pub enum PairAMode {
+    SignedOffset(Reg, SImm7Scaled),
+    PreIndexed(Writable<Reg>, SImm7Scaled),
+    PostIndexed(Writable<Reg>, SImm7Scaled),
+}
+
+//=============================================================================
+// Instruction sub-components (conditions, branches and branch targets):
+// definitions
+
+/// Condition for conditional branches.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum Cond {
+    Eq = 0,
+    Ne = 1,
+    Hs = 2,
+    Lo = 3,
+    Mi = 4,
+    Pl = 5,
+    Vs = 6,
+    Vc = 7,
+    Hi = 8,
+    Ls = 9,
+    Ge = 10,
+    Lt = 11,
+    Gt = 12,
+    Le = 13,
+    Al = 14,
+    Nv = 15,
+}
+
+impl Cond {
+    /// Return the inverted condition.
+    pub fn invert(self) -> Cond {
+        match self {
+            Cond::Eq => Cond::Ne,
+            Cond::Ne => Cond::Eq,
+
+            Cond::Hs => Cond::Lo,
+            Cond::Lo => Cond::Hs,
+
+            Cond::Mi => Cond::Pl,
+            Cond::Pl => Cond::Mi,
+
+            Cond::Vs => Cond::Vc,
+            Cond::Vc => Cond::Vs,
+
+            Cond::Hi => Cond::Ls,
+            Cond::Ls => Cond::Hi,
+
+            Cond::Ge => Cond::Lt,
+            Cond::Lt => Cond::Ge,
+
+            Cond::Gt => Cond::Le,
+            Cond::Le => Cond::Gt,
+
+            Cond::Al => Cond::Nv,
+            Cond::Nv => Cond::Al,
+        }
+    }
+
+    /// Return the machine encoding of this condition.
+    pub fn bits(self) -> u32 {
+        self as u32
+    }
+}
+
+/// The kind of conditional branch: the common-case-optimized "reg-is-zero" /
+/// "reg-is-nonzero" variants, or the generic one that tests the machine
+/// condition codes.
+#[derive(Clone, Copy, Debug)]
+pub enum CondBrKind {
+    /// Condition: given register is zero.
+    Zero(Reg),
+    /// Condition: given register is nonzero.
+    NotZero(Reg),
+    /// Condition: the given condition-code test is true.
+    Cond(Cond),
+}
+
+impl CondBrKind {
+    /// Return the inverted branch condition.
+    pub fn invert(self) -> CondBrKind {
+        match self {
+            CondBrKind::Zero(reg) => CondBrKind::NotZero(reg),
+            CondBrKind::NotZero(reg) => CondBrKind::Zero(reg),
+            CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()),
+        }
+    }
+}
+
+/// A branch target. Either unresolved (basic-block index) or resolved (offset
+/// from end of current instruction).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum BranchTarget {
+    /// An unresolved reference to a Label, as passed into
+    /// `lower_branch_group()`.
+    Label(MachLabel),
+    /// A fixed PC offset.
+    ResolvedOffset(i32),
+}
+
+impl BranchTarget {
+    /// Return the target's label, if it is a label-based target.
+    pub fn as_label(self) -> Option<MachLabel> {
+        match self {
+            BranchTarget::Label(l) => Some(l),
+            _ => None,
+        }
+    }
+
+    /// Return the target's offset, if specified, or zero if label-based.
+    pub fn as_offset19_or_zero(self) -> u32 {
+        let off = match self {
+            BranchTarget::ResolvedOffset(off) => off >> 2,
+            _ => 0,
+        };
+        assert!(off <= 0x3ffff);
+        assert!(off >= -0x40000);
+        (off as u32) & 0x7ffff
+    }
+
+    /// Return the target's offset, if specified, or zero if label-based.
+    pub fn as_offset26_or_zero(self) -> u32 {
+        let off = match self {
+            BranchTarget::ResolvedOffset(off) => off >> 2,
+            _ => 0,
+        };
+        assert!(off <= 0x1ffffff);
+        assert!(off >= -0x2000000);
+        (off as u32) & 0x3ffffff
+    }
+}
+
+impl PrettyPrint for ShiftOpAndAmt {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("{:?} {}", self.op(), self.amt().value())
+    }
+}
+
+impl PrettyPrint for ExtendOp {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("{:?}", self)
+    }
+}
+
+impl PrettyPrint for MemLabel {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &MemLabel::PCRel(off) => format!("pc+{}", off),
+        }
+    }
+}
+
+fn shift_for_type(ty: Type) -> usize {
+    match ty.bytes() {
+        1 => 0,
+        2 => 1,
+        4 => 2,
+        8 => 3,
+        16 => 4,
+        _ => panic!("unknown type: {}", ty),
+    }
+}
+
+impl PrettyPrint for AMode {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &AMode::Unscaled(reg, simm9) => {
+                if simm9.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &AMode::UnsignedOffset(reg, uimm12) => {
+                if uimm12.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &AMode::RegReg(r1, r2) => {
+                format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),)
+            }
+            &AMode::RegScaled(r1, r2, ty) => {
+                let shift = shift_for_type(ty);
+                format!(
+                    "[{}, {}, LSL #{}]",
+                    r1.show_rru(mb_rru),
+                    r2.show_rru(mb_rru),
+                    shift,
+                )
+            }
+            &AMode::RegScaledExtended(r1, r2, ty, op) => {
+                let shift = shift_for_type(ty);
+                let size = match op {
+                    ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
+                    _ => OperandSize::Size64,
+                };
+                let op = op.show_rru(mb_rru);
+                format!(
+                    "[{}, {}, {} #{}]",
+                    r1.show_rru(mb_rru),
+                    show_ireg_sized(r2, mb_rru, size),
+                    op,
+                    shift
+                )
+            }
+            &AMode::RegExtended(r1, r2, op) => {
+                let size = match op {
+                    ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
+                    _ => OperandSize::Size64,
+                };
+                let op = op.show_rru(mb_rru);
+                format!(
+                    "[{}, {}, {}]",
+                    r1.show_rru(mb_rru),
+                    show_ireg_sized(r2, mb_rru, size),
+                    op,
+                )
+            }
+            &AMode::Label(ref label) => label.show_rru(mb_rru),
+            &AMode::PreIndexed(r, simm9) => format!(
+                "[{}, {}]!",
+                r.to_reg().show_rru(mb_rru),
+                simm9.show_rru(mb_rru)
+            ),
+            &AMode::PostIndexed(r, simm9) => format!(
+                "[{}], {}",
+                r.to_reg().show_rru(mb_rru),
+                simm9.show_rru(mb_rru)
+            ),
+            // Eliminated by `mem_finalize()`.
+            &AMode::SPOffset(..)
+            | &AMode::FPOffset(..)
+            | &AMode::NominalSPOffset(..)
+            | &AMode::RegOffset(..) => {
+                panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
+            }
+        }
+    }
+}
+
+impl PrettyPrint for PairAMode {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &PairAMode::SignedOffset(reg, simm7) => {
+                if simm7.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &PairAMode::PreIndexed(reg, simm7) => format!(
+                "[{}, {}]!",
+                reg.to_reg().show_rru(mb_rru),
+                simm7.show_rru(mb_rru)
+            ),
+            &PairAMode::PostIndexed(reg, simm7) => format!(
+                "[{}], {}",
+                reg.to_reg().show_rru(mb_rru),
+                simm7.show_rru(mb_rru)
+            ),
+        }
+    }
+}
+
+impl PrettyPrint for Cond {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        let mut s = format!("{:?}", self);
+        s.make_ascii_lowercase();
+        s
+    }
+}
+
+impl PrettyPrint for BranchTarget {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &BranchTarget::Label(label) => format!("label{:?}", label.get()),
+            &BranchTarget::ResolvedOffset(off) => format!("{}", off),
+        }
+    }
+}
+
+/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
+/// 64-bit variants of many instructions (and integer registers).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum OperandSize {
+    Size32,
+    Size64,
+}
+
+impl OperandSize {
+    /// 32-bit case?
+    pub fn is32(self) -> bool {
+        self == OperandSize::Size32
+    }
+    /// 64-bit case?
+    pub fn is64(self) -> bool {
+        self == OperandSize::Size64
+    }
+    /// Convert from an `is32` boolean flag to an `OperandSize`.
+    pub fn from_is32(is32: bool) -> OperandSize {
+        if is32 {
+            OperandSize::Size32
+        } else {
+            OperandSize::Size64
+        }
+    }
+    /// Convert from a needed width to the smallest size that fits.
+    pub fn from_bits<I: Into<usize>>(bits: I) -> OperandSize {
+        let bits: usize = bits.into();
+        assert!(bits <= 64);
+        if bits <= 32 {
+            OperandSize::Size32
+        } else {
+            OperandSize::Size64
+        }
+    }
+
+    /// Convert from an integer type into the smallest size that fits.
+    pub fn from_ty(ty: Type) -> OperandSize {
+        Self::from_bits(ty_bits(ty))
+    }
+
+    /// Convert to I32, I64, or I128.
+    pub fn to_ty(self) -> Type {
+        match self {
+            OperandSize::Size32 => I32,
+            OperandSize::Size64 => I64,
+        }
+    }
+
+    pub fn sf_bit(&self) -> u32 {
+        match self {
+            OperandSize::Size32 => 0,
+            OperandSize::Size64 => 1,
+        }
+    }
+}
+
+/// Type used to communicate the size of a scalar SIMD & FP operand.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ScalarSize {
+    Size8,
+    Size16,
+    Size32,
+    Size64,
+    Size128,
+}
+
+impl ScalarSize {
+    /// Convert from a needed width to the smallest size that fits.
+    pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize {
+        match bits.into().next_power_of_two() {
+            8 => ScalarSize::Size8,
+            16 => ScalarSize::Size16,
+            32 => ScalarSize::Size32,
+            64 => ScalarSize::Size64,
+            128 => ScalarSize::Size128,
+            w => panic!("Unexpected type width: {}", w),
+        }
+    }
+
+    /// Convert to an integer operand size.
+    pub fn operand_size(&self) -> OperandSize {
+        match self {
+            ScalarSize::Size32 => OperandSize::Size32,
+            ScalarSize::Size64 => OperandSize::Size64,
+            _ => panic!("Unexpected operand_size request for: {:?}", self),
+        }
+    }
+
+    /// Convert from a type into the smallest size that fits.
+    pub fn from_ty(ty: Type) -> ScalarSize {
+        Self::from_bits(ty_bits(ty))
+    }
+
+    /// Return the encoding bits that are used by some scalar FP instructions
+    /// for a particular operand size.
+    pub fn ftype(&self) -> u32 {
+        match self {
+            ScalarSize::Size16 => 0b11,
+            ScalarSize::Size32 => 0b00,
+            ScalarSize::Size64 => 0b01,
+            _ => panic!("Unexpected scalar FP operand size: {:?}", self),
+        }
+    }
+}
+
+/// Type used to communicate the size of a vector operand.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum VectorSize {
+    Size8x8,
+    Size8x16,
+    Size16x4,
+    Size16x8,
+    Size32x2,
+    Size32x4,
+    Size64x2,
+}
+
+impl VectorSize {
+    /// Get the vector operand size with the given scalar size as lane size.
+    pub fn from_lane_size(size: ScalarSize, is_128bit: bool) -> VectorSize {
+        match (size, is_128bit) {
+            (ScalarSize::Size8, false) => VectorSize::Size8x8,
+            (ScalarSize::Size8, true) => VectorSize::Size8x16,
+            (ScalarSize::Size16, false) => VectorSize::Size16x4,
+            (ScalarSize::Size16, true) => VectorSize::Size16x8,
+            (ScalarSize::Size32, false) => VectorSize::Size32x2,
+            (ScalarSize::Size32, true) => VectorSize::Size32x4,
+            (ScalarSize::Size64, true) => VectorSize::Size64x2,
+            _ => panic!("Unexpected scalar FP operand size: {:?}", size),
+        }
+    }
+
+    /// Convert from a type into a vector operand size.
+    pub fn from_ty(ty: Type) -> VectorSize {
+        match ty {
+            B8X16 => VectorSize::Size8x16,
+            B16X8 => VectorSize::Size16x8,
+            B32X4 => VectorSize::Size32x4,
+            B64X2 => VectorSize::Size64x2,
+            F32X2 => VectorSize::Size32x2,
+            F32X4 => VectorSize::Size32x4,
+            F64X2 => VectorSize::Size64x2,
+            I8X8 => VectorSize::Size8x8,
+            I8X16 => VectorSize::Size8x16,
+            I16X4 => VectorSize::Size16x4,
+            I16X8 => VectorSize::Size16x8,
+            I32X2 => VectorSize::Size32x2,
+            I32X4 => VectorSize::Size32x4,
+            I64X2 => VectorSize::Size64x2,
+            _ => unimplemented!("Unsupported type: {}", ty),
+        }
+    }
+
+    /// Get the integer operand size that corresponds to a lane of a vector with a certain size.
+    pub fn operand_size(&self) -> OperandSize {
+        match self {
+            VectorSize::Size64x2 => OperandSize::Size64,
+            _ => OperandSize::Size32,
+        }
+    }
+
+    /// Get the scalar operand size that corresponds to a lane of a vector with a certain size.
+    pub fn lane_size(&self) -> ScalarSize {
+        match self {
+            VectorSize::Size8x8 => ScalarSize::Size8,
+            VectorSize::Size8x16 => ScalarSize::Size8,
+            VectorSize::Size16x4 => ScalarSize::Size16,
+            VectorSize::Size16x8 => ScalarSize::Size16,
+            VectorSize::Size32x2 => ScalarSize::Size32,
+            VectorSize::Size32x4 => ScalarSize::Size32,
+            VectorSize::Size64x2 => ScalarSize::Size64,
+        }
+    }
+
+    pub fn is_128bits(&self) -> bool {
+        match self {
+            VectorSize::Size8x8 => false,
+            VectorSize::Size8x16 => true,
+            VectorSize::Size16x4 => false,
+            VectorSize::Size16x8 => true,
+            VectorSize::Size32x2 => false,
+            VectorSize::Size32x4 => true,
+            VectorSize::Size64x2 => true,
+        }
+    }
+
+    /// Produces a `VectorSize` with lanes twice as wide.  Note that if the resulting
+    /// size would exceed 128 bits, then the number of lanes is also halved, so as to
+    /// ensure that the result size is at most 128 bits.
+    pub fn widen(&self) -> VectorSize {
+        match self {
+            VectorSize::Size8x8 => VectorSize::Size16x8,
+            VectorSize::Size8x16 => VectorSize::Size16x8,
+            VectorSize::Size16x4 => VectorSize::Size32x4,
+            VectorSize::Size16x8 => VectorSize::Size32x4,
+            VectorSize::Size32x2 => VectorSize::Size64x2,
+            VectorSize::Size32x4 => VectorSize::Size64x2,
+            VectorSize::Size64x2 => unreachable!(),
+        }
+    }
+
+    /// Produces a `VectorSize` that has the same lane width, but half as many lanes.
+    pub fn halve(&self) -> VectorSize {
+        match self {
+            VectorSize::Size8x16 => VectorSize::Size8x8,
+            VectorSize::Size16x8 => VectorSize::Size16x4,
+            VectorSize::Size32x4 => VectorSize::Size32x2,
+            _ => *self,
+        }
+    }
+
+    /// Return the encoding bits that are used by some SIMD instructions
+    /// for a particular operand size.
+    pub fn enc_size(&self) -> (u32, u32) {
+        let q = self.is_128bits() as u32;
+        let size = match self.lane_size() {
+            ScalarSize::Size8 => 0b00,
+            ScalarSize::Size16 => 0b01,
+            ScalarSize::Size32 => 0b10,
+            ScalarSize::Size64 => 0b11,
+            _ => unreachable!(),
+        };
+
+        (q, size)
+    }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
new file mode 100644
index 0000000000..5d0270dade
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
@@ -0,0 +1,2359 @@
+//! AArch64 ISA: binary code emission.
+
+use crate::binemit::{CodeOffset, Reloc, StackMap};
+use crate::ir::constant::ConstantData;
+use crate::ir::types::*;
+use crate::ir::{MemFlags, TrapCode};
+use crate::isa::aarch64::inst::*;
+use crate::machinst::ty_bits;
+
+use regalloc::{Reg, RegClass, Writable};
+
+use core::convert::TryFrom;
+use log::debug;
+
+/// Memory label/reference finalization: convert a MemLabel to a PC-relative
+/// offset, possibly emitting relocation(s) as necessary.
+pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
+    match label {
+        &MemLabel::PCRel(rel) => rel,
+    }
+}
+
+/// Memory addressing mode finalization: convert "special" modes (e.g.,
+/// generic arbitrary stack offset) into real addressing modes, possibly by
+/// emitting some helper instructions that come immediately before the use
+/// of this amode.
+pub fn mem_finalize(
+    insn_off: CodeOffset,
+    mem: &AMode,
+    state: &EmitState,
+) -> (SmallVec<[Inst; 4]>, AMode) {
+    match mem {
+        &AMode::RegOffset(_, off, ty)
+        | &AMode::SPOffset(off, ty)
+        | &AMode::FPOffset(off, ty)
+        | &AMode::NominalSPOffset(off, ty) => {
+            let basereg = match mem {
+                &AMode::RegOffset(reg, _, _) => reg,
+                &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(),
+                &AMode::FPOffset(..) => fp_reg(),
+                _ => unreachable!(),
+            };
+            let adj = match mem {
+                &AMode::NominalSPOffset(..) => {
+                    debug!(
+                        "mem_finalize: nominal SP offset {} + adj {} -> {}",
+                        off,
+                        state.virtual_sp_offset,
+                        off + state.virtual_sp_offset
+                    );
+                    state.virtual_sp_offset
+                }
+                _ => 0,
+            };
+            let off = off + adj;
+
+            if let Some(simm9) = SImm9::maybe_from_i64(off) {
+                let mem = AMode::Unscaled(basereg, simm9);
+                (smallvec![], mem)
+            } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
+                let mem = AMode::UnsignedOffset(basereg, uimm12s);
+                (smallvec![], mem)
+            } else {
+                let tmp = writable_spilltmp_reg();
+                let mut const_insts = Inst::load_constant(tmp, off as u64);
+                // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
+                // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
+                // is a valid base (for SPOffset) which we must handle here.
+                // Also, SP needs to be the first arg, not second.
+                let add_inst = Inst::AluRRRExtend {
+                    alu_op: ALUOp::Add64,
+                    rd: tmp,
+                    rn: basereg,
+                    rm: tmp.to_reg(),
+                    extendop: ExtendOp::UXTX,
+                };
+                const_insts.push(add_inst);
+                (const_insts, AMode::reg(tmp.to_reg()))
+            }
+        }
+
+        &AMode::Label(ref label) => {
+            let off = memlabel_finalize(insn_off, label);
+            (smallvec![], AMode::Label(MemLabel::PCRel(off)))
+        }
+
+        _ => (smallvec![], mem.clone()),
+    }
+}
+
+/// Helper: get a ConstantData from a u64.
+pub fn u64_constant(bits: u64) -> ConstantData {
+    let data = bits.to_le_bytes();
+    ConstantData::from(&data[..])
+}
+
+//=============================================================================
+// Instructions and subcomponents: emission
+
+fn machreg_to_gpr(m: Reg) -> u32 {
+    assert_eq!(m.get_class(), RegClass::I64);
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn machreg_to_vec(m: Reg) -> u32 {
+    assert_eq!(m.get_class(), RegClass::V128);
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+    (bits_31_21 << 21)
+        | (bits_15_10 << 10)
+        | machreg_to_gpr(rd.to_reg())
+        | (machreg_to_gpr(rn) << 5)
+        | (machreg_to_gpr(rm) << 16)
+}
+
+fn enc_arith_rr_imm12(
+    bits_31_24: u32,
+    immshift: u32,
+    imm12: u32,
+    rn: Reg,
+    rd: Writable<Reg>,
+) -> u32 {
+    (bits_31_24 << 24)
+        | (immshift << 22)
+        | (imm12 << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (top11 << 21)
+        | (machreg_to_gpr(rm) << 16)
+        | (bit15 << 15)
+        | (machreg_to_gpr(ra) << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
+    assert!(off_26_0 < (1 << 26));
+    (op_31_26 << 26) | off_26_0
+}
+
+fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
+    assert!(off_18_0 < (1 << 19));
+    (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
+}
+
+fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
+    assert!(off_18_0 < (1 << 19));
+    assert!(cond < (1 << 4));
+    (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
+}
+
+fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
+    match kind {
+        CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
+        CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
+        CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
+    }
+}
+
+const MOVE_WIDE_FIXED: u32 = 0x12800000;
+
+#[repr(u32)]
+enum MoveWideOpcode {
+    MOVN = 0b00,
+    MOVZ = 0b10,
+    MOVK = 0b11,
+}
+
+fn enc_move_wide(
+    op: MoveWideOpcode,
+    rd: Writable<Reg>,
+    imm: MoveWideConst,
+    size: OperandSize,
+) -> u32 {
+    assert!(imm.shift <= 0b11);
+    MOVE_WIDE_FIXED
+        | size.sf_bit() << 31
+        | (op as u32) << 29
+        | u32::from(imm.shift) << 21
+        | u32::from(imm.bits) << 5
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
+    (op_31_22 << 22)
+        | (simm7.bits() << 15)
+        | (machreg_to_gpr(rt2) << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rt)
+}
+
+fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
+    (op_31_22 << 22)
+        | (simm9.bits() << 12)
+        | (op_11_10 << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
+    (op_31_22 << 22)
+        | (0b1 << 24)
+        | (uimm12.bits() << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_reg(
+    op_31_22: u32,
+    rn: Reg,
+    rm: Reg,
+    s_bit: bool,
+    extendop: Option<ExtendOp>,
+    rd: Reg,
+) -> u32 {
+    let s_bit = if s_bit { 1 } else { 0 };
+    let extend_bits = match extendop {
+        Some(ExtendOp::UXTW) => 0b010,
+        Some(ExtendOp::SXTW) => 0b110,
+        Some(ExtendOp::SXTX) => 0b111,
+        None => 0b011, // LSL
+        _ => panic!("bad extend mode for ld/st AMode"),
+    };
+    (op_31_22 << 22)
+        | (1 << 21)
+        | (machreg_to_gpr(rm) << 16)
+        | (extend_bits << 13)
+        | (s_bit << 12)
+        | (0b10 << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
+    (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
+    debug_assert_eq!(q & 0b1, q);
+    debug_assert_eq!(size & 0b11, size);
+    0b0_0_0011010_10_00000_110_0_00_00000_00000
+        | q << 30
+        | size << 10
+        | machreg_to_gpr(rn) << 5
+        | machreg_to_vec(rt.to_reg())
+}
+
+fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (top11 << 21)
+        | (machreg_to_vec(rm) << 16)
+        | (bit15_10 << 10)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (0b01011010110 << 21)
+        | size << 31
+        | opcode2 << 16
+        | opcode1 << 10
+        | machreg_to_gpr(rn) << 5
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_br(rn: Reg) -> u32 {
+    0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
+}
+
+fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
+    let off = u32::try_from(off).unwrap();
+    let immlo = off & 3;
+    let immhi = (off >> 2) & ((1 << 19) - 1);
+    (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
+    0b100_11010100_00000_0000_00_00000_00000
+        | (machreg_to_gpr(rm) << 16)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rd.to_reg())
+        | (cond.bits() << 12)
+}
+
+fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
+    0b000_11110_00_1_00000_0000_11_00000_00000
+        | (size.ftype() << 22)
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+        | (cond.bits() << 12)
+}
+
+fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
+    0b100_11010100_11111_0000_01_11111_00000
+        | machreg_to_gpr(rd.to_reg())
+        | (cond.invert().bits() << 12)
+}
+
+fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
+    0b0_1_1_11010010_00000_0000_10_00000_0_0000
+        | size.sf_bit() << 31
+        | imm.bits() << 16
+        | cond.bits() << 12
+        | machreg_to_gpr(rn) << 5
+        | nzcv.bits()
+}
+
+fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
+    0b00001110_101_00000_00011_1_00000_00000
+        | ((is_16b as u32) << 30)
+        | machreg_to_vec(rd.to_reg())
+        | (machreg_to_vec(rn) << 16)
+        | (machreg_to_vec(rn) << 5)
+}
+
+fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+    (top22 << 10)
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
+    (top17 << 15)
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(ra) << 10)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
+    0b000_11110_00_1_00000_00_1000_00000_00000
+        | (size.ftype() << 22)
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(rn) << 5)
+}
+
+fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    debug_assert_eq!(qu & 0b11, qu);
+    debug_assert_eq!(size & 0b11, size);
+    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
+    let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
+    bits | qu << 29
+        | size << 22
+        | bits_12_16 << 12
+        | machreg_to_vec(rn) << 5
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    debug_assert_eq!(q & 0b1, q);
+    debug_assert_eq!(u & 0b1, u);
+    debug_assert_eq!(size & 0b11, size);
+    debug_assert_eq!(opcode & 0b11111, opcode);
+    0b0_0_0_01110_00_11000_0_0000_10_00000_00000
+        | q << 30
+        | u << 29
+        | size << 22
+        | opcode << 12
+        | machreg_to_vec(rn) << 5
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+    debug_assert_eq!(len & 0b11, len);
+    0b0_1_001110_000_00000_0_00_0_00_00000_00000
+        | (machreg_to_vec(rm) << 16)
+        | len << 13
+        | (is_extension as u32) << 12
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_dmb_ish() -> u32 {
+    0xD5033BBF
+}
+
+fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
+    let sz = match ty {
+        I64 => 0b11,
+        I32 => 0b10,
+        I16 => 0b01,
+        I8 => 0b00,
+        _ => unreachable!(),
+    };
+    0b00001000_01011111_01111100_00000000
+        | (sz << 30)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rt.to_reg())
+}
+
+fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
+    let sz = match ty {
+        I64 => 0b11,
+        I32 => 0b10,
+        I16 => 0b01,
+        I8 => 0b00,
+        _ => unreachable!(),
+    };
+    0b00001000_00000000_01111100_00000000
+        | (sz << 30)
+        | (machreg_to_gpr(rs.to_reg()) << 16)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rt)
+}
+
+fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
+    let abc = (imm >> 5) as u32;
+    let defgh = (imm & 0b11111) as u32;
+
+    debug_assert_eq!(cmode & 0b1111, cmode);
+    debug_assert_eq!(q_op & 0b11, q_op);
+
+    0b0_0_0_0111100000_000_0000_01_00000_00000
+        | (q_op << 29)
+        | (abc << 16)
+        | (cmode << 12)
+        | (defgh << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
+/// State carried between emissions of a sequence of instructions.
+#[derive(Default, Clone, Debug)]
+pub struct EmitState {
+    /// Addend to convert nominal-SP offsets to real-SP offsets at the current
+    /// program point.
+    pub(crate) virtual_sp_offset: i64,
+    /// Offset of FP from nominal-SP.
+    pub(crate) nominal_sp_to_fp: i64,
+    /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
+    stack_map: Option<StackMap>,
+    /// Current source-code location corresponding to instruction to be emitted.
+    cur_srcloc: SourceLoc,
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+    fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
+        EmitState {
+            virtual_sp_offset: 0,
+            nominal_sp_to_fp: abi.frame_size() as i64,
+            stack_map: None,
+            cur_srcloc: SourceLoc::default(),
+        }
+    }
+
+    fn pre_safepoint(&mut self, stack_map: StackMap) {
+        self.stack_map = Some(stack_map);
+    }
+
+    fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
+        self.cur_srcloc = srcloc;
+    }
+}
+
+impl EmitState {
+    fn take_stack_map(&mut self) -> Option<StackMap> {
+        self.stack_map.take()
+    }
+
+    fn clear_post_insn(&mut self) {
+        self.stack_map = None;
+    }
+
+    fn cur_srcloc(&self) -> SourceLoc {
+        self.cur_srcloc
+    }
+}
+
+/// Constant state used during function compilation.
+pub struct EmitInfo(settings::Flags);
+
+impl EmitInfo {
+    pub(crate) fn new(flags: settings::Flags) -> Self {
+        Self(flags)
+    }
+}
+
+impl MachInstEmitInfo for EmitInfo {
+    fn flags(&self) -> &settings::Flags {
+        &self.0
+    }
+}
+
+impl MachInstEmit for Inst {
+    type State = EmitState;
+    type Info = EmitInfo;
+    type UnwindInfo = super::unwind::AArch64UnwindInfo;
+
+    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
+        // N.B.: we *must* not exceed the "worst-case size" used to compute
+        // where to insert islands, except when islands are explicitly triggered
+        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
+        // to allow disabling the check for `JTSequence`, which is always
+        // emitted following an `EmitIsland`.
+        let mut start_off = sink.cur_offset();
+
+        match self {
+            &Inst::AluRRR { alu_op, rd, rn, rm } => {
+                let top11 = match alu_op {
+                    ALUOp::Add32 => 0b00001011_000,
+                    ALUOp::Add64 => 0b10001011_000,
+                    ALUOp::Sub32 => 0b01001011_000,
+                    ALUOp::Sub64 => 0b11001011_000,
+                    ALUOp::Orr32 => 0b00101010_000,
+                    ALUOp::Orr64 => 0b10101010_000,
+                    ALUOp::And32 => 0b00001010_000,
+                    ALUOp::And64 => 0b10001010_000,
+                    ALUOp::Eor32 => 0b01001010_000,
+                    ALUOp::Eor64 => 0b11001010_000,
+                    ALUOp::OrrNot32 => 0b00101010_001,
+                    ALUOp::OrrNot64 => 0b10101010_001,
+                    ALUOp::AndNot32 => 0b00001010_001,
+                    ALUOp::AndNot64 => 0b10001010_001,
+                    ALUOp::EorNot32 => 0b01001010_001,
+                    ALUOp::EorNot64 => 0b11001010_001,
+                    ALUOp::AddS32 => 0b00101011_000,
+                    ALUOp::AddS64 => 0b10101011_000,
+                    ALUOp::SubS32 => 0b01101011_000,
+                    ALUOp::SubS64 => 0b11101011_000,
+                    ALUOp::SDiv64 => 0b10011010_110,
+                    ALUOp::UDiv64 => 0b10011010_110,
+                    ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
+                    ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
+                    ALUOp::SMulH => 0b10011011_010,
+                    ALUOp::UMulH => 0b10011011_110,
+                };
+                let bit15_10 = match alu_op {
+                    ALUOp::SDiv64 => 0b000011,
+                    ALUOp::UDiv64 => 0b000010,
+                    ALUOp::RotR32 | ALUOp::RotR64 => 0b001011,
+                    ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
+                    ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
+                    ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
+                    ALUOp::SMulH | ALUOp::UMulH => 0b011111,
+                    _ => 0b000000,
+                };
+                debug_assert_ne!(writable_stack_reg(), rd);
+                // The stack pointer is the zero register in this context, so this might be an
+                // indication that something is wrong.
+                debug_assert_ne!(stack_reg(), rn);
+                debug_assert_ne!(stack_reg(), rm);
+                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
+            }
+            &Inst::AluRRRR {
+                alu_op,
+                rd,
+                rm,
+                rn,
+                ra,
+            } => {
+                let (top11, bit15) = match alu_op {
+                    ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
+                    ALUOp3::MSub32 => (0b0_00_11011_000, 1),
+                    ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
+                    ALUOp3::MSub64 => (0b1_00_11011_000, 1),
+                };
+                sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
+            }
+            &Inst::AluRRImm12 {
+                alu_op,
+                rd,
+                rn,
+                ref imm12,
+            } => {
+                let top8 = match alu_op {
+                    ALUOp::Add32 => 0b000_10001,
+                    ALUOp::Add64 => 0b100_10001,
+                    ALUOp::Sub32 => 0b010_10001,
+                    ALUOp::Sub64 => 0b110_10001,
+                    ALUOp::AddS32 => 0b001_10001,
+                    ALUOp::AddS64 => 0b101_10001,
+                    ALUOp::SubS32 => 0b011_10001,
+                    ALUOp::SubS64 => 0b111_10001,
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                sink.put4(enc_arith_rr_imm12(
+                    top8,
+                    imm12.shift_bits(),
+                    imm12.imm_bits(),
+                    rn,
+                    rd,
+                ));
+            }
+            &Inst::AluRRImmLogic {
+                alu_op,
+                rd,
+                rn,
+                ref imml,
+            } => {
+                let (top9, inv) = match alu_op {
+                    ALUOp::Orr32 => (0b001_100100, false),
+                    ALUOp::Orr64 => (0b101_100100, false),
+                    ALUOp::And32 => (0b000_100100, false),
+                    ALUOp::And64 => (0b100_100100, false),
+                    ALUOp::Eor32 => (0b010_100100, false),
+                    ALUOp::Eor64 => (0b110_100100, false),
+                    ALUOp::OrrNot32 => (0b001_100100, true),
+                    ALUOp::OrrNot64 => (0b101_100100, true),
+                    ALUOp::AndNot32 => (0b000_100100, true),
+                    ALUOp::AndNot64 => (0b100_100100, true),
+                    ALUOp::EorNot32 => (0b010_100100, true),
+                    ALUOp::EorNot64 => (0b110_100100, true),
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                let imml = if inv { imml.invert() } else { imml.clone() };
+                sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
+            }
+
+            &Inst::AluRRImmShift {
+                alu_op,
+                rd,
+                rn,
+                ref immshift,
+            } => {
+                let amt = immshift.value();
+                let (top10, immr, imms) = match alu_op {
+                    ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
+                    ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
+                    ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
+                    ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
+                    ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
+                    ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
+                    ALUOp::Lsl32 => (
+                        0b0101001100,
+                        u32::from((32 - amt) % 32),
+                        u32::from(31 - amt),
+                    ),
+                    ALUOp::Lsl64 => (
+                        0b1101001101,
+                        u32::from((64 - amt) % 64),
+                        u32::from(63 - amt),
+                    ),
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                sink.put4(
+                    (top10 << 22)
+                        | (immr << 16)
+                        | (imms << 10)
+                        | (machreg_to_gpr(rn) << 5)
+                        | machreg_to_gpr(rd.to_reg()),
+                );
+            }
+
+            &Inst::AluRRRShift {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                ref shiftop,
+            } => {
+                let top11: u32 = match alu_op {
+                    ALUOp::Add32 => 0b000_01011000,
+                    ALUOp::Add64 => 0b100_01011000,
+                    ALUOp::AddS32 => 0b001_01011000,
+                    ALUOp::AddS64 => 0b101_01011000,
+                    ALUOp::Sub32 => 0b010_01011000,
+                    ALUOp::Sub64 => 0b110_01011000,
+                    ALUOp::SubS32 => 0b011_01011000,
+                    ALUOp::SubS64 => 0b111_01011000,
+                    ALUOp::Orr32 => 0b001_01010000,
+                    ALUOp::Orr64 => 0b101_01010000,
+                    ALUOp::And32 => 0b000_01010000,
+                    ALUOp::And64 => 0b100_01010000,
+                    ALUOp::Eor32 => 0b010_01010000,
+                    ALUOp::Eor64 => 0b110_01010000,
+                    ALUOp::OrrNot32 => 0b001_01010001,
+                    ALUOp::OrrNot64 => 0b101_01010001,
+                    ALUOp::EorNot32 => 0b010_01010001,
+                    ALUOp::EorNot64 => 0b110_01010001,
+                    ALUOp::AndNot32 => 0b000_01010001,
+                    ALUOp::AndNot64 => 0b100_01010001,
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
+                let bits_15_10 = u32::from(shiftop.amt().value());
+                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
+            }
+
+            &Inst::AluRRRExtend {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                extendop,
+            } => {
+                let top11: u32 = match alu_op {
+                    ALUOp::Add32 => 0b00001011001,
+                    ALUOp::Add64 => 0b10001011001,
+                    ALUOp::Sub32 => 0b01001011001,
+                    ALUOp::Sub64 => 0b11001011001,
+                    ALUOp::AddS32 => 0b00101011001,
+                    ALUOp::AddS64 => 0b10101011001,
+                    ALUOp::SubS32 => 0b01101011001,
+                    ALUOp::SubS64 => 0b11101011001,
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                let bits_15_10 = u32::from(extendop.bits()) << 3;
+                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
+            }
+
+            &Inst::BitRR { op, rd, rn, .. } => {
+                let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
+                let (op1, op2) = match op {
+                    BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
+                    BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
+                    BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
+                };
+                sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
+            }
+
+            &Inst::ULoad8 { rd, ref mem, flags }
+            | &Inst::SLoad8 { rd, ref mem, flags }
+            | &Inst::ULoad16 { rd, ref mem, flags }
+            | &Inst::SLoad16 { rd, ref mem, flags }
+            | &Inst::ULoad32 { rd, ref mem, flags }
+            | &Inst::SLoad32 { rd, ref mem, flags }
+            | &Inst::ULoad64 {
+                rd, ref mem, flags, ..
+            }
+            | &Inst::FpuLoad32 { rd, ref mem, flags }
+            | &Inst::FpuLoad64 { rd, ref mem, flags }
+            | &Inst::FpuLoad128 { rd, ref mem, flags } => {
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+
+                for inst in mem_insts.into_iter() {
+                    inst.emit(sink, emit_info, state);
+                }
+
+                // ldst encoding helpers take Reg, not Writable<Reg>.
+                let rd = rd.to_reg();
+
+                // This is the base opcode (top 10 bits) for the "unscaled
+                // immediate" form (Unscaled). Other addressing modes will OR in
+                // other values for bits 24/25 (bits 1/2 of this constant).
+                let (op, bits) = match self {
+                    &Inst::ULoad8 { .. } => (0b0011100001, 8),
+                    &Inst::SLoad8 { .. } => (0b0011100010, 8),
+                    &Inst::ULoad16 { .. } => (0b0111100001, 16),
+                    &Inst::SLoad16 { .. } => (0b0111100010, 16),
+                    &Inst::ULoad32 { .. } => (0b1011100001, 32),
+                    &Inst::SLoad32 { .. } => (0b1011100010, 32),
+                    &Inst::ULoad64 { .. } => (0b1111100001, 64),
+                    &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
+                    &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
+                    &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
+                    _ => unreachable!(),
+                };
+
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() && !flags.notrap() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+
+                match &mem {
+                    &AMode::Unscaled(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
+                    }
+                    &AMode::UnsignedOffset(reg, uimm12scaled) => {
+                        if uimm12scaled.value() != 0 {
+                            assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
+                        }
+                        sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
+                    }
+                    &AMode::RegReg(r1, r2) => {
+                        sink.put4(enc_ldst_reg(
+                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
+                        ));
+                    }
+                    &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => {
+                        assert_eq!(bits, ty_bits(ty));
+                        let extendop = match &mem {
+                            &AMode::RegScaled(..) => None,
+                            &AMode::RegScaledExtended(_, _, _, op) => Some(op),
+                            _ => unreachable!(),
+                        };
+                        sink.put4(enc_ldst_reg(
+                            op, r1, r2, /* scaled = */ true, extendop, rd,
+                        ));
+                    }
+                    &AMode::RegExtended(r1, r2, extendop) => {
+                        sink.put4(enc_ldst_reg(
+                            op,
+                            r1,
+                            r2,
+                            /* scaled = */ false,
+                            Some(extendop),
+                            rd,
+                        ));
+                    }
+                    &AMode::Label(ref label) => {
+                        let offset = match label {
+                            // cast i32 to u32 (two's-complement)
+                            &MemLabel::PCRel(off) => off as u32,
+                        } / 4;
+                        assert!(offset < (1 << 19));
+                        match self {
+                            &Inst::ULoad32 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
+                            }
+                            &Inst::SLoad32 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
+                            }
+                            &Inst::FpuLoad32 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
+                            }
+                            &Inst::ULoad64 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
+                            }
+                            &Inst::FpuLoad64 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
+                            }
+                            &Inst::FpuLoad128 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
+                            }
+                            _ => panic!("Unspported size for LDR from constant pool!"),
+                        }
+                    }
+                    &AMode::PreIndexed(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+                    }
+                    &AMode::PostIndexed(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+                    }
+                    // Eliminated by `mem_finalize()` above.
+                    &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
+                        panic!("Should not see stack-offset here!")
+                    }
+                    &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
+                }
+            }
+
+            &Inst::Store8 { rd, ref mem, flags }
+            | &Inst::Store16 { rd, ref mem, flags }
+            | &Inst::Store32 { rd, ref mem, flags }
+            | &Inst::Store64 { rd, ref mem, flags }
+            | &Inst::FpuStore32 { rd, ref mem, flags }
+            | &Inst::FpuStore64 { rd, ref mem, flags }
+            | &Inst::FpuStore128 { rd, ref mem, flags } => {
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+
+                for inst in mem_insts.into_iter() {
+                    inst.emit(sink, emit_info, state);
+                }
+
+                let (op, bits) = match self {
+                    &Inst::Store8 { .. } => (0b0011100000, 8),
+                    &Inst::Store16 { .. } => (0b0111100000, 16),
+                    &Inst::Store32 { .. } => (0b1011100000, 32),
+                    &Inst::Store64 { .. } => (0b1111100000, 64),
+                    &Inst::FpuStore32 { .. } => (0b1011110000, 32),
+                    &Inst::FpuStore64 { .. } => (0b1111110000, 64),
+                    &Inst::FpuStore128 { .. } => (0b0011110010, 128),
+                    _ => unreachable!(),
+                };
+
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() && !flags.notrap() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+
+                match &mem {
+                    &AMode::Unscaled(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
+                    }
+                    &AMode::UnsignedOffset(reg, uimm12scaled) => {
+                        if uimm12scaled.value() != 0 {
+                            assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
+                        }
+                        sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
+                    }
+                    &AMode::RegReg(r1, r2) => {
+                        sink.put4(enc_ldst_reg(
+                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
+                        ));
+                    }
+                    &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => {
+                        let extendop = match &mem {
+                            &AMode::RegScaled(..) => None,
+                            &AMode::RegScaledExtended(_, _, _, op) => Some(op),
+                            _ => unreachable!(),
+                        };
+                        sink.put4(enc_ldst_reg(
+                            op, r1, r2, /* scaled = */ true, extendop, rd,
+                        ));
+                    }
+                    &AMode::RegExtended(r1, r2, extendop) => {
+                        sink.put4(enc_ldst_reg(
+                            op,
+                            r1,
+                            r2,
+                            /* scaled = */ false,
+                            Some(extendop),
+                            rd,
+                        ));
+                    }
+                    &AMode::Label(..) => {
+                        panic!("Store to a MemLabel not implemented!");
+                    }
+                    &AMode::PreIndexed(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+                    }
+                    &AMode::PostIndexed(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+                    }
+                    // Eliminated by `mem_finalize()` above.
+                    &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
+                        panic!("Should not see stack-offset here!")
+                    }
+                    &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
+                }
+            }
+
+            &Inst::StoreP64 {
+                rt,
+                rt2,
+                ref mem,
+                flags,
+            } => {
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() && !flags.notrap() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+                match mem {
+                    &PairAMode::SignedOffset(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
+                    }
+                    &PairAMode::PreIndexed(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
+                    }
+                    &PairAMode::PostIndexed(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
+                    }
+                }
+            }
+            &Inst::LoadP64 {
+                rt,
+                rt2,
+                ref mem,
+                flags,
+            } => {
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() && !flags.notrap() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+
+                let rt = rt.to_reg();
+                let rt2 = rt2.to_reg();
+                match mem {
+                    &PairAMode::SignedOffset(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
+                    }
+                    &PairAMode::PreIndexed(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
+                    }
+                    &PairAMode::PostIndexed(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
+                    }
+                }
+            }
+            &Inst::Mov64 { rd, rm } => {
+                assert!(rd.to_reg().get_class() == rm.get_class());
+                assert!(rm.get_class() == RegClass::I64);
+
+                // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+                // should never MOV to XZR.
+                assert!(rd.to_reg() != stack_reg());
+
+                if rm == stack_reg() {
+                    // We can't use ORR here, so use an `add rd, sp, #0` instead.
+                    let imm12 = Imm12::maybe_from_u64(0).unwrap();
+                    sink.put4(enc_arith_rr_imm12(
+                        0b100_10001,
+                        imm12.shift_bits(),
+                        imm12.imm_bits(),
+                        rm,
+                        rd,
+                    ));
+                } else {
+                    // Encoded as ORR rd, rm, zero.
+                    sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
+                }
+            }
+            &Inst::Mov32 { rd, rm } => {
+                // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+                // should never MOV to XZR.
+                assert!(machreg_to_gpr(rd.to_reg()) != 31);
+                // Encoded as ORR rd, rm, zero.
+                sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
+            }
+            &Inst::MovZ { rd, imm, size } => {
+                sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
+            }
+            &Inst::MovN { rd, imm, size } => {
+                sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
+            }
+            &Inst::MovK { rd, imm, size } => {
+                sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
+            }
+            &Inst::CSel { rd, rn, rm, cond } => {
+                sink.put4(enc_csel(rd, rn, rm, cond));
+            }
+            &Inst::CSet { rd, cond } => {
+                sink.put4(enc_cset(rd, cond));
+            }
+            &Inst::CCmpImm {
+                size,
+                rn,
+                imm,
+                nzcv,
+                cond,
+            } => {
+                sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
+            }
+            &Inst::AtomicRMW { ty, op } => {
+                /* Emit this:
+                      dmb         ish
+                     again:
+                      ldxr{,b,h}  x/w27, [x25]
+                      op          x28, x27, x26 // op is add,sub,and,orr,eor
+                      stxr{,b,h}  w24, x/w28, [x25]
+                      cbnz        x24, again
+                      dmb         ish
+
+                   Operand conventions:
+                      IN:  x25 (addr), x26 (2nd arg for op)
+                      OUT: x27 (old value), x24 (trashed), x28 (trashed)
+
+                   It is unfortunate that, per the ARM documentation, x28 cannot be used for
+                   both the store-data and success-flag operands of stxr.  This causes the
+                   instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
+                   instead for the success-flag.
+
+                   In the case where the operation is 'xchg', the second insn is instead
+                     mov          x28, x26
+                   so that we simply write in the destination, the "2nd arg for op".
+                */
+                let xzr = zero_reg();
+                let x24 = xreg(24);
+                let x25 = xreg(25);
+                let x26 = xreg(26);
+                let x27 = xreg(27);
+                let x28 = xreg(28);
+                let x24wr = writable_xreg(24);
+                let x27wr = writable_xreg(27);
+                let x28wr = writable_xreg(28);
+                let again_label = sink.get_label();
+
+                sink.put4(enc_dmb_ish()); // dmb ish
+
+                // again:
+                sink.bind_label(again_label);
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() {
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+                sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
+
+                if op == inst_common::AtomicRmwOp::Xchg {
+                    // mov x28, x26
+                    sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26))
+                } else {
+                    // add/sub/and/orr/eor x28, x27, x26
+                    let bits_31_21 = match op {
+                        inst_common::AtomicRmwOp::Add => 0b100_01011_00_0,
+                        inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0,
+                        inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
+                        inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
+                        inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
+                        inst_common::AtomicRmwOp::Xchg => unreachable!(),
+                    };
+                    sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
+                }
+
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() {
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+                sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
+
+                // cbnz w24, again
+                // Note, we're actually testing x24, and relying on the default zero-high-half
+                // rule in the assignment that `stxr` does.
+                let br_offset = sink.cur_offset();
+                sink.put4(enc_conditional_br(
+                    BranchTarget::Label(again_label),
+                    CondBrKind::NotZero(x24),
+                ));
+                sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
+
+                sink.put4(enc_dmb_ish()); // dmb ish
+            }
+            &Inst::AtomicCAS { ty } => {
+                /* Emit this:
+                     dmb         ish
+                    again:
+                     ldxr{,b,h}  x/w27, [x25]
+                     and         x24, x26, MASK (= 2^size_bits - 1)
+                     cmp         x27, x24
+                     b.ne        out
+                     stxr{,b,h}  w24, x/w28, [x25]
+                     cbnz        x24, again
+                    out:
+                     dmb         ish
+
+                  Operand conventions:
+                     IN:  x25 (addr), x26 (expected value), x28 (replacement value)
+                     OUT: x27 (old value), x24 (trashed)
+                */
+                let xzr = zero_reg();
+                let x24 = xreg(24);
+                let x25 = xreg(25);
+                let x26 = xreg(26);
+                let x27 = xreg(27);
+                let x28 = xreg(28);
+                let xzrwr = writable_zero_reg();
+                let x24wr = writable_xreg(24);
+                let x27wr = writable_xreg(27);
+                let again_label = sink.get_label();
+                let out_label = sink.get_label();
+
+                sink.put4(enc_dmb_ish()); // dmb ish
+
+                // again:
+                sink.bind_label(again_label);
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() {
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+                sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
+
+                if ty == I64 {
+                    // mov x24, x26
+                    sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26))
+                } else {
+                    // and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF
+                    let (mask, s) = match ty {
+                        I8 => (0xFF, 7),
+                        I16 => (0xFFFF, 15),
+                        I32 => (0xFFFFFFFF, 31),
+                        _ => unreachable!(),
+                    };
+                    sink.put4(enc_arith_rr_imml(
+                        0b100_100100,
+                        ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(),
+                        x26,
+                        x24wr,
+                    ))
+                }
+
+                // cmp x27, x24 (== subs xzr, x27, x24)
+                sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24));
+
+                // b.ne out
+                let br_out_offset = sink.cur_offset();
+                sink.put4(enc_conditional_br(
+                    BranchTarget::Label(out_label),
+                    CondBrKind::Cond(Cond::Ne),
+                ));
+                sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
+
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() {
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+                sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
+
+                // cbnz w24, again.
+                // Note, we're actually testing x24, and relying on the default zero-high-half
+                // rule in the assignment that `stxr` does.
+                let br_again_offset = sink.cur_offset();
+                sink.put4(enc_conditional_br(
+                    BranchTarget::Label(again_label),
+                    CondBrKind::NotZero(x24),
+                ));
+                sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
+
+                // out:
+                sink.bind_label(out_label);
+                sink.put4(enc_dmb_ish()); // dmb ish
+            }
+            &Inst::AtomicLoad { ty, r_data, r_addr } => {
+                let op = match ty {
+                    I8 => 0b0011100001,
+                    I16 => 0b0111100001,
+                    I32 => 0b1011100001,
+                    I64 => 0b1111100001,
+                    _ => unreachable!(),
+                };
+                sink.put4(enc_dmb_ish()); // dmb ish
+
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() {
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+                let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
+                sink.put4(enc_ldst_uimm12(
+                    op,
+                    uimm12scaled_zero,
+                    r_addr,
+                    r_data.to_reg(),
+                ));
+            }
+            &Inst::AtomicStore { ty, r_data, r_addr } => {
+                let op = match ty {
+                    I8 => 0b0011100000,
+                    I16 => 0b0111100000,
+                    I32 => 0b1011100000,
+                    I64 => 0b1111100000,
+                    _ => unreachable!(),
+                };
+
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() {
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+                let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
+                sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data));
+                sink.put4(enc_dmb_ish()); // dmb ish
+            }
+            &Inst::Fence {} => {
+                sink.put4(enc_dmb_ish()); // dmb ish
+            }
+            &Inst::FpuMove64 { rd, rn } => {
+                sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
+            }
+            &Inst::FpuMove128 { rd, rn } => {
+                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
+            }
+            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
+                let (imm5, shift, mask) = match size.lane_size() {
+                    ScalarSize::Size32 => (0b00100, 3, 0b011),
+                    ScalarSize::Size64 => (0b01000, 4, 0b001),
+                    _ => unimplemented!(),
+                };
+                debug_assert_eq!(idx & mask, idx);
+                let imm5 = imm5 | ((idx as u32) << shift);
+                sink.put4(
+                    0b010_11110000_00000_000001_00000_00000
+                        | (imm5 << 16)
+                        | (machreg_to_vec(rn) << 5)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
+            &Inst::FpuRR { fpu_op, rd, rn } => {
+                let top22 = match fpu_op {
+                    FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
+                    FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
+                    FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000,
+                    FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000,
+                    FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000,
+                    FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000,
+                    FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000,
+                    FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000,
+                };
+                sink.put4(enc_fpurr(top22, rd, rn));
+            }
+            &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+                let top22 = match fpu_op {
+                    FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
+                    FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
+                    FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
+                    FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
+                    FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
+                    FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
+                    FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
+                    FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
+                    FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
+                    FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
+                    FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
+                    FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
+                    FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
+                    FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
+                    FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
+                    FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
+                };
+                sink.put4(enc_fpurrr(top22, rd, rn, rm));
+            }
+            &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
+                FPUOpRI::UShr32(imm) => {
+                    debug_assert_eq!(32, imm.lane_size_in_bits);
+                    sink.put4(
+                        0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
+                            | imm.enc() << 16
+                            | machreg_to_vec(rn) << 5
+                            | machreg_to_vec(rd.to_reg()),
+                    )
+                }
+                FPUOpRI::UShr64(imm) => {
+                    debug_assert_eq!(64, imm.lane_size_in_bits);
+                    sink.put4(
+                        0b01_1_111110_0000000_00_0_0_0_1_00000_00000
+                            | imm.enc() << 16
+                            | machreg_to_vec(rn) << 5
+                            | machreg_to_vec(rd.to_reg()),
+                    )
+                }
+                FPUOpRI::Sli64(imm) => {
+                    debug_assert_eq!(64, imm.lane_size_in_bits);
+                    sink.put4(
+                        0b01_1_111110_0000000_010101_00000_00000
+                            | imm.enc() << 16
+                            | machreg_to_vec(rn) << 5
+                            | machreg_to_vec(rd.to_reg()),
+                    )
+                }
+                FPUOpRI::Sli32(imm) => {
+                    debug_assert_eq!(32, imm.lane_size_in_bits);
+                    sink.put4(
+                        0b0_0_1_011110_0000000_010101_00000_00000
+                            | imm.enc() << 16
+                            | machreg_to_vec(rn) << 5
+                            | machreg_to_vec(rd.to_reg()),
+                    )
+                }
+            },
+            &Inst::FpuRRRR {
+                fpu_op,
+                rd,
+                rn,
+                rm,
+                ra,
+            } => {
+                let top17 = match fpu_op {
+                    FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
+                    FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
+                };
+                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
+            }
+            &Inst::VecMisc { op, rd, rn, size } => {
+                let (q, enc_size) = size.enc_size();
+                let (u, bits_12_16, size) = match op {
+                    VecMisc2::Not => (0b1, 0b00101, 0b00),
+                    VecMisc2::Neg => (0b1, 0b01011, enc_size),
+                    VecMisc2::Abs => (0b0, 0b01011, enc_size),
+                    VecMisc2::Fabs => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b0, 0b01111, enc_size)
+                    }
+                    VecMisc2::Fneg => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b1, 0b01111, enc_size)
+                    }
+                    VecMisc2::Fsqrt => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b1, 0b11111, enc_size)
+                    }
+                    VecMisc2::Rev64 => {
+                        debug_assert_ne!(VectorSize::Size64x2, size);
+                        (0b0, 0b00000, enc_size)
+                    }
+                    VecMisc2::Shll => {
+                        debug_assert_ne!(VectorSize::Size64x2, size);
+                        debug_assert!(!size.is_128bits());
+                        (0b1, 0b10011, enc_size)
+                    }
+                    VecMisc2::Fcvtzs => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b0, 0b11011, enc_size)
+                    }
+                    VecMisc2::Fcvtzu => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b1, 0b11011, enc_size)
+                    }
+                    VecMisc2::Scvtf => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b0, 0b11101, enc_size & 0b1)
+                    }
+                    VecMisc2::Ucvtf => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b1, 0b11101, enc_size & 0b1)
+                    }
+                    VecMisc2::Frintn => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b0, 0b11000, enc_size & 0b01)
+                    }
+                    VecMisc2::Frintz => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b0, 0b11001, enc_size | 0b10)
+                    }
+                    VecMisc2::Frintm => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b0, 0b11001, enc_size & 0b01)
+                    }
+                    VecMisc2::Frintp => {
+                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+                        (0b0, 0b11000, enc_size | 0b10)
+                    }
+                };
+                sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
+            }
+            &Inst::VecLanes { op, rd, rn, size } => {
+                let (q, size) = match size {
+                    VectorSize::Size8x16 => (0b1, 0b00),
+                    VectorSize::Size16x8 => (0b1, 0b01),
+                    VectorSize::Size32x4 => (0b1, 0b10),
+                    _ => unreachable!(),
+                };
+                let (u, opcode) = match op {
+                    VecLanesOp::Uminv => (0b1, 0b11010),
+                    VecLanesOp::Addv => (0b0, 0b11011),
+                };
+                sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
+            }
+            &Inst::VecShiftImm {
+                op,
+                rd,
+                rn,
+                size,
+                imm,
+            } => {
+                let (is_shr, template) = match op {
+                    VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
+                    VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
+                    VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
+                };
+                let imm = imm as u32;
+                // Deal with the somewhat strange encoding scheme for, and limits on,
+                // the shift amount.
+                let immh_immb = match (size, is_shr) {
+                    (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
+                        0b_1000_000_u32 | (64 - imm)
+                    }
+                    (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
+                        0b_0100_000_u32 | (32 - imm)
+                    }
+                    (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
+                        0b_0010_000_u32 | (16 - imm)
+                    }
+                    (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
+                        0b_0001_000_u32 | (8 - imm)
+                    }
+                    (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
+                    (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
+                    (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
+                    (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
+                    _ => panic!(
+                        "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
+                        op, size, imm
+                    ),
+                };
+                let rn_enc = machreg_to_vec(rn);
+                let rd_enc = machreg_to_vec(rd.to_reg());
+                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
+            }
+            &Inst::VecExtract { rd, rn, rm, imm4 } => {
+                if imm4 < 16 {
+                    let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
+                    let rm_enc = machreg_to_vec(rm);
+                    let rn_enc = machreg_to_vec(rn);
+                    let rd_enc = machreg_to_vec(rd.to_reg());
+                    sink.put4(
+                        template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
+                    );
+                } else {
+                    panic!(
+                        "aarch64: Inst::VecExtract: emit: invalid extract index {}",
+                        imm4
+                    );
+                }
+            }
+            &Inst::VecTbl {
+                rd,
+                rn,
+                rm,
+                is_extension,
+            } => {
+                sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
+            }
+            &Inst::VecTbl2 {
+                rd,
+                rn,
+                rn2,
+                rm,
+                is_extension,
+            } => {
+                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
+                sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
+            }
+            &Inst::FpuCmp32 { rn, rm } => {
+                sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
+            }
+            &Inst::FpuCmp64 { rn, rm } => {
+                sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
+            }
+            &Inst::FpuToInt { op, rd, rn } => {
+                let top16 = match op {
+                    // FCVTZS (32/32-bit)
+                    FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
+                    // FCVTZU (32/32-bit)
+                    FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
+                    // FCVTZS (32/64-bit)
+                    FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
+                    // FCVTZU (32/64-bit)
+                    FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
+                    // FCVTZS (64/32-bit)
+                    FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
+                    // FCVTZU (64/32-bit)
+                    FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
+                    // FCVTZS (64/64-bit)
+                    FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
+                    // FCVTZU (64/64-bit)
+                    FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
+                };
+                sink.put4(enc_fputoint(top16, rd, rn));
+            }
+            &Inst::IntToFpu { op, rd, rn } => {
+                let top16 = match op {
+                    // SCVTF (32/32-bit)
+                    IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
+                    // UCVTF (32/32-bit)
+                    IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
+                    // SCVTF (64/32-bit)
+                    IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
+                    // UCVTF (64/32-bit)
+                    IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
+                    // SCVTF (32/64-bit)
+                    IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
+                    // UCVTF (32/64-bit)
+                    IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
+                    // SCVTF (64/64-bit)
+                    IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
+                    // UCVTF (64/64-bit)
+                    IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
+                };
+                sink.put4(enc_inttofpu(top16, rd, rn));
+            }
+            &Inst::LoadFpuConst64 { rd, const_data } => {
+                let inst = Inst::FpuLoad64 {
+                    rd,
+                    mem: AMode::Label(MemLabel::PCRel(8)),
+                    flags: MemFlags::trusted(),
+                };
+                inst.emit(sink, emit_info, state);
+                let inst = Inst::Jump {
+                    dest: BranchTarget::ResolvedOffset(12),
+                };
+                inst.emit(sink, emit_info, state);
+                sink.put8(const_data);
+            }
+            &Inst::LoadFpuConst128 { rd, const_data } => {
+                let inst = Inst::FpuLoad128 {
+                    rd,
+                    mem: AMode::Label(MemLabel::PCRel(8)),
+                    flags: MemFlags::trusted(),
+                };
+                inst.emit(sink, emit_info, state);
+                let inst = Inst::Jump {
+                    dest: BranchTarget::ResolvedOffset(20),
+                };
+                inst.emit(sink, emit_info, state);
+
+                for i in const_data.to_le_bytes().iter() {
+                    sink.put1(*i);
+                }
+            }
+            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
+            }
+            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
+            }
+            &Inst::FpuRound { op, rd, rn } => {
+                let top22 = match op {
+                    FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
+                    FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
+                    FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
+                    FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
+                    FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
+                    FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
+                    FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
+                    FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
+                };
+                sink.put4(enc_fround(top22, rd, rn));
+            }
+            &Inst::MovToFpu { rd, rn, size } => {
+                let template = match size {
+                    ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
+                    ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
+                    _ => unreachable!(),
+                };
+                sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
+            }
+            &Inst::MovToVec { rd, rn, idx, size } => {
+                let (imm5, shift) = match size.lane_size() {
+                    ScalarSize::Size8 => (0b00001, 1),
+                    ScalarSize::Size16 => (0b00010, 2),
+                    ScalarSize::Size32 => (0b00100, 3),
+                    ScalarSize::Size64 => (0b01000, 4),
+                    _ => unreachable!(),
+                };
+                debug_assert_eq!(idx & (0b11111 >> shift), idx);
+                let imm5 = imm5 | ((idx as u32) << shift);
+                sink.put4(
+                    0b010_01110000_00000_0_0011_1_00000_00000
+                        | (imm5 << 16)
+                        | (machreg_to_gpr(rn) << 5)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
+            &Inst::MovFromVec { rd, rn, idx, size } => {
+                let (q, imm5, shift, mask) = match size {
+                    VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
+                    VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
+                    VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
+                    VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
+                    _ => unreachable!(),
+                };
+                debug_assert_eq!(idx & mask, idx);
+                let imm5 = imm5 | ((idx as u32) << shift);
+                sink.put4(
+                    0b000_01110000_00000_0_0111_1_00000_00000
+                        | (q << 30)
+                        | (imm5 << 16)
+                        | (machreg_to_vec(rn) << 5)
+                        | machreg_to_gpr(rd.to_reg()),
+                );
+            }
+            &Inst::MovFromVecSigned {
+                rd,
+                rn,
+                idx,
+                size,
+                scalar_size,
+            } => {
+                let (imm5, shift, half) = match size {
+                    VectorSize::Size8x8 => (0b00001, 1, true),
+                    VectorSize::Size8x16 => (0b00001, 1, false),
+                    VectorSize::Size16x4 => (0b00010, 2, true),
+                    VectorSize::Size16x8 => (0b00010, 2, false),
+                    VectorSize::Size32x2 => {
+                        debug_assert_ne!(scalar_size, OperandSize::Size32);
+                        (0b00100, 3, true)
+                    }
+                    VectorSize::Size32x4 => {
+                        debug_assert_ne!(scalar_size, OperandSize::Size32);
+                        (0b00100, 3, false)
+                    }
+                    _ => panic!("Unexpected vector operand size"),
+                };
+                debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
+                let imm5 = imm5 | ((idx as u32) << shift);
+                sink.put4(
+                    0b000_01110000_00000_0_0101_1_00000_00000
+                        | (scalar_size.is64() as u32) << 30
+                        | (imm5 << 16)
+                        | (machreg_to_vec(rn) << 5)
+                        | machreg_to_gpr(rd.to_reg()),
+                );
+            }
+            &Inst::VecDup { rd, rn, size } => {
+                let imm5 = match size {
+                    VectorSize::Size8x16 => 0b00001,
+                    VectorSize::Size16x8 => 0b00010,
+                    VectorSize::Size32x4 => 0b00100,
+                    VectorSize::Size64x2 => 0b01000,
+                    _ => unimplemented!(),
+                };
+                sink.put4(
+                    0b010_01110000_00000_000011_00000_00000
+                        | (imm5 << 16)
+                        | (machreg_to_gpr(rn) << 5)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
+            &Inst::VecDupFromFpu { rd, rn, size } => {
+                let imm5 = match size {
+                    VectorSize::Size32x4 => 0b00100,
+                    VectorSize::Size64x2 => 0b01000,
+                    _ => unimplemented!(),
+                };
+                sink.put4(
+                    0b010_01110000_00000_000001_00000_00000
+                        | (imm5 << 16)
+                        | (machreg_to_vec(rn) << 5)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
+            &Inst::VecDupImm {
+                rd,
+                imm,
+                invert,
+                size,
+            } => {
+                let (imm, shift, shift_ones) = imm.value();
+                let (op, cmode) = match size.lane_size() {
+                    ScalarSize::Size8 => {
+                        assert!(!invert);
+                        assert_eq!(shift, 0);
+
+                        (0, 0b1110)
+                    }
+                    ScalarSize::Size16 => {
+                        let s = shift & 8;
+
+                        assert!(!shift_ones);
+                        assert_eq!(s, shift);
+
+                        (invert as u32, 0b1000 | (s >> 2))
+                    }
+                    ScalarSize::Size32 => {
+                        if shift_ones {
+                            assert!(shift == 8 || shift == 16);
+
+                            (invert as u32, 0b1100 | (shift >> 4))
+                        } else {
+                            let s = shift & 24;
+
+                            assert_eq!(s, shift);
+
+                            (invert as u32, 0b0000 | (s >> 2))
+                        }
+                    }
+                    ScalarSize::Size64 => {
+                        assert!(!invert);
+                        assert_eq!(shift, 0);
+
+                        (1, 0b1110)
+                    }
+                    _ => unreachable!(),
+                };
+                let q_op = op | ((size.is_128bits() as u32) << 1);
+
+                sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
+            }
+            &Inst::VecExtend {
+                t,
+                rd,
+                rn,
+                high_half,
+            } => {
+                let (u, immh) = match t {
+                    VecExtendOp::Sxtl8 => (0b0, 0b001),
+                    VecExtendOp::Sxtl16 => (0b0, 0b010),
+                    VecExtendOp::Sxtl32 => (0b0, 0b100),
+                    VecExtendOp::Uxtl8 => (0b1, 0b001),
+                    VecExtendOp::Uxtl16 => (0b1, 0b010),
+                    VecExtendOp::Uxtl32 => (0b1, 0b100),
+                };
+                sink.put4(
+                    0b000_011110_0000_000_101001_00000_00000
+                        | ((high_half as u32) << 30)
+                        | (u << 29)
+                        | (immh << 19)
+                        | (machreg_to_vec(rn) << 5)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
+            &Inst::VecMiscNarrow {
+                op,
+                rd,
+                rn,
+                size,
+                high_half,
+            } => {
+                let size = match size.lane_size() {
+                    ScalarSize::Size8 => 0b00,
+                    ScalarSize::Size16 => 0b01,
+                    ScalarSize::Size32 => 0b10,
+                    _ => panic!("Unexpected vector operand lane size!"),
+                };
+                let (u, bits_12_16) = match op {
+                    VecMiscNarrowOp::Xtn => (0b0, 0b10010),
+                    VecMiscNarrowOp::Sqxtn => (0b0, 0b10100),
+                    VecMiscNarrowOp::Sqxtun => (0b1, 0b10010),
+                };
+                sink.put4(enc_vec_rr_misc(
+                    ((high_half as u32) << 1) | u,
+                    size,
+                    bits_12_16,
+                    rd,
+                    rn,
+                ));
+            }
+            &Inst::VecMovElement {
+                rd,
+                rn,
+                dest_idx,
+                src_idx,
+                size,
+            } => {
+                let (imm5, shift) = match size.lane_size() {
+                    ScalarSize::Size8 => (0b00001, 1),
+                    ScalarSize::Size16 => (0b00010, 2),
+                    ScalarSize::Size32 => (0b00100, 3),
+                    ScalarSize::Size64 => (0b01000, 4),
+                    _ => unreachable!(),
+                };
+                let mask = 0b11111 >> shift;
+                debug_assert_eq!(dest_idx & mask, dest_idx);
+                debug_assert_eq!(src_idx & mask, src_idx);
+                let imm4 = (src_idx as u32) << (shift - 1);
+                let imm5 = imm5 | ((dest_idx as u32) << shift);
+                sink.put4(
+                    0b011_01110000_00000_0_0000_1_00000_00000
+                        | (imm5 << 16)
+                        | (imm4 << 11)
+                        | (machreg_to_vec(rn) << 5)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
+            &Inst::VecRRR {
+                rd,
+                rn,
+                rm,
+                alu_op,
+                size,
+            } => {
+                let (q, enc_size) = size.enc_size();
+                let is_float = match alu_op {
+                    VecALUOp::Fcmeq
+                    | VecALUOp::Fcmgt
+                    | VecALUOp::Fcmge
+                    | VecALUOp::Fadd
+                    | VecALUOp::Fsub
+                    | VecALUOp::Fdiv
+                    | VecALUOp::Fmax
+                    | VecALUOp::Fmin
+                    | VecALUOp::Fmul => true,
+                    _ => false,
+                };
+                let enc_float_size = match (is_float, size) {
+                    (true, VectorSize::Size32x2) => 0b0,
+                    (true, VectorSize::Size32x4) => 0b0,
+                    (true, VectorSize::Size64x2) => 0b1,
+                    (true, _) => unimplemented!(),
+                    _ => 0,
+                };
+
+                let (top11, bit15_10) = match alu_op {
+                    VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
+                    VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
+                    VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
+                    VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
+                    VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
+                    VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
+                    VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
+                    VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
+                    VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
+                    VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
+                    VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
+                    VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
+                    // The following logical instructions operate on bytes, so are not encoded differently
+                    // for the different vector types.
+                    VecALUOp::And => (0b000_01110_00_1, 0b000111),
+                    VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
+                    VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
+                    VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
+                    VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
+                    VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
+                    VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
+                    VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
+                    VecALUOp::Mul => {
+                        debug_assert_ne!(size, VectorSize::Size64x2);
+                        (0b000_01110_00_1 | enc_size << 1, 0b100111)
+                    }
+                    VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
+                    VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
+                    VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011),
+                    VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011),
+                    VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001),
+                    VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001),
+                    VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101),
+                    VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
+                    VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
+                    VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
+                    VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
+                    VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
+                    VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
+                    VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
+                    VecALUOp::Umlal => {
+                        debug_assert!(!size.is_128bits());
+                        (0b001_01110_00_1 | enc_size << 1, 0b100000)
+                    }
+                    VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
+                    VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
+                    VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
+                };
+                let top11 = match alu_op {
+                    VecALUOp::Smull | VecALUOp::Smull2 => top11,
+                    _ if is_float => top11 | (q << 9) | enc_float_size << 1,
+                    _ => top11 | (q << 9),
+                };
+                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
+            }
+            &Inst::VecLoadReplicate { rd, rn, size } => {
+                let (q, size) = size.enc_size();
+
+                let srcloc = state.cur_srcloc();
+                if srcloc != SourceLoc::default() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                }
+
+                sink.put4(enc_ldst_vec(q, size, rn, rd));
+            }
+            &Inst::VecCSel { rd, rn, rm, cond } => {
+                /* Emit this:
+                      b.cond  else
+                      mov     rd, rm
+                      b       out
+                     else:
+                      mov     rd, rn
+                     out:
+
+                   Note, we could do better in the cases where rd == rn or rd == rm.
+                */
+                let else_label = sink.get_label();
+                let out_label = sink.get_label();
+
+                // b.cond else
+                let br_else_offset = sink.cur_offset();
+                sink.put4(enc_conditional_br(
+                    BranchTarget::Label(else_label),
+                    CondBrKind::Cond(cond),
+                ));
+                sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
+
+                // mov rd, rm
+                sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
+
+                // b out
+                let b_out_offset = sink.cur_offset();
+                sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
+                sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
+                sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
+
+                // else:
+                sink.bind_label(else_label);
+
+                // mov rd, rn
+                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
+
+                // out:
+                sink.bind_label(out_label);
+            }
+            &Inst::MovToNZCV { rn } => {
+                sink.put4(0xd51b4200 | machreg_to_gpr(rn));
+            }
+            &Inst::MovFromNZCV { rd } => {
+                sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits >= 8 => {
+                let top22 = match (signed, from_bits, to_bits) {
+                    (false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32)
+                    (false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32)
+                    (true, 8, 32) => 0b000_100110_0_000000_000111,  // SXTB (32)
+                    (true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32)
+                    // The 64-bit unsigned variants are the same as the 32-bit ones,
+                    // because writes to Wn zero out the top 32 bits of Xn
+                    (false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64)
+                    (false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64)
+                    (true, 8, 64) => 0b100_100110_1_000000_000111,  // SXTB (64)
+                    (true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64)
+                    // 32-to-64: the unsigned case is a 'mov' (special-cased below).
+                    (false, 32, 64) => 0,                           // MOV
+                    (true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64)
+                    _ => panic!(
+                        "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}",
+                        signed, from_bits, to_bits
+                    ),
+                };
+                if top22 != 0 {
+                    sink.put4(enc_extend(top22, rd, rn));
+                } else {
+                    Inst::mov32(rd, rn).emit(sink, emit_info, state);
+                }
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits == 1 && signed => {
+                assert!(to_bits <= 64);
+                // Reduce sign-extend-from-1-bit to:
+                // - and rd, rn, #1
+                // - sub rd, zr, rd
+
+                // We don't have ImmLogic yet, so we just hardcode this. FIXME.
+                sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+                let sub_inst = Inst::AluRRR {
+                    alu_op: ALUOp::Sub64,
+                    rd,
+                    rn: zero_reg(),
+                    rm: rd.to_reg(),
+                };
+                sub_inst.emit(sink, emit_info, state);
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits == 1 && !signed => {
+                assert!(to_bits <= 64);
+                // Reduce zero-extend-from-1-bit to:
+                // - and rd, rn, #1
+
+                // We don't have ImmLogic yet, so we just hardcode this. FIXME.
+                sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+            }
+            &Inst::Extend { .. } => {
+                panic!("Unsupported extend variant");
+            }
+            &Inst::Jump { ref dest } => {
+                let off = sink.cur_offset();
+                // Indicate that the jump uses a label, if so, so that a fixup can occur later.
+                if let Some(l) = dest.as_label() {
+                    sink.use_label_at_offset(off, l, LabelUse::Branch26);
+                    sink.add_uncond_branch(off, off + 4, l);
+                }
+                // Emit the jump itself.
+                sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
+            }
+            &Inst::Ret => {
+                sink.put4(0xd65f03c0);
+            }
+            &Inst::EpiloguePlaceholder => {
+                // Noop; this is just a placeholder for epilogues.
+            }
+            &Inst::Call { ref info } => {
+                if let Some(s) = state.take_stack_map() {
+                    sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+                }
+                let loc = state.cur_srcloc();
+                sink.add_reloc(loc, Reloc::Arm64Call, &info.dest, 0);
+                sink.put4(enc_jump26(0b100101, 0));
+                if info.opcode.is_call() {
+                    sink.add_call_site(loc, info.opcode);
+                }
+            }
+            &Inst::CallInd { ref info } => {
+                if let Some(s) = state.take_stack_map() {
+                    sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+                }
+                sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
+                let loc = state.cur_srcloc();
+                if info.opcode.is_call() {
+                    sink.add_call_site(loc, info.opcode);
+                }
+            }
+            &Inst::CondBr {
+                taken,
+                not_taken,
+                kind,
+            } => {
+                // Conditional part first.
+                let cond_off = sink.cur_offset();
+                if let Some(l) = taken.as_label() {
+                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
+                    let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
+                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
+                }
+                sink.put4(enc_conditional_br(taken, kind));
+
+                // Unconditional part next.
+                let uncond_off = sink.cur_offset();
+                if let Some(l) = not_taken.as_label() {
+                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
+                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
+                }
+                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
+            }
+            &Inst::TrapIf { kind, trap_code } => {
+                // condbr KIND, LABEL
+                let off = sink.cur_offset();
+                let label = sink.get_label();
+                sink.put4(enc_conditional_br(
+                    BranchTarget::Label(label),
+                    kind.invert(),
+                ));
+                sink.use_label_at_offset(off, label, LabelUse::Branch19);
+                // udf
+                let trap = Inst::Udf { trap_code };
+                trap.emit(sink, emit_info, state);
+                // LABEL:
+                sink.bind_label(label);
+            }
+            &Inst::IndirectBr { rn, .. } => {
+                sink.put4(enc_br(rn));
+            }
+            &Inst::Nop0 => {}
+            &Inst::Nop4 => {
+                sink.put4(0xd503201f);
+            }
+            &Inst::Brk => {
+                sink.put4(0xd4200000);
+            }
+            &Inst::Udf { trap_code } => {
+                let srcloc = state.cur_srcloc();
+                sink.add_trap(srcloc, trap_code);
+                if let Some(s) = state.take_stack_map() {
+                    sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+                }
+                sink.put4(0xd4a00000);
+            }
+            &Inst::Adr { rd, off } => {
+                assert!(off > -(1 << 20));
+                assert!(off < (1 << 20));
+                sink.put4(enc_adr(off, rd));
+            }
+            &Inst::Word4 { data } => {
+                sink.put4(data);
+            }
+            &Inst::Word8 { data } => {
+                sink.put8(data);
+            }
+            &Inst::JTSequence {
+                ridx,
+                rtmp1,
+                rtmp2,
+                ref info,
+                ..
+            } => {
+                // This sequence is *one* instruction in the vcode, and is expanded only here at
+                // emission time, because we cannot allow the regalloc to insert spills/reloads in
+                // the middle; we depend on hardcoded PC-rel addressing below.
+
+                // Branch to default when condition code from prior comparison indicates.
+                let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
+                // No need to inform the sink's branch folding logic about this branch, because it
+                // will not be merged with any other branch, flipped, or elided (it is not preceded
+                // or succeeded by any other branch). Just emit it with the label use.
+                let default_br_offset = sink.cur_offset();
+                if let BranchTarget::Label(l) = info.default_target {
+                    sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
+                }
+                sink.put4(br);
+
+                // Save index in a tmp (the live range of ridx only goes to start of this
+                // sequence; rtmp1 or rtmp2 may overwrite it).
+                let inst = Inst::gen_move(rtmp2, ridx, I64);
+                inst.emit(sink, emit_info, state);
+                // Load address of jump table
+                let inst = Inst::Adr { rd: rtmp1, off: 16 };
+                inst.emit(sink, emit_info, state);
+                // Load value out of jump table
+                let inst = Inst::SLoad32 {
+                    rd: rtmp2,
+                    mem: AMode::reg_plus_reg_scaled_extended(
+                        rtmp1.to_reg(),
+                        rtmp2.to_reg(),
+                        I32,
+                        ExtendOp::UXTW,
+                    ),
+                    flags: MemFlags::trusted(),
+                };
+                inst.emit(sink, emit_info, state);
+                // Add base of jump table to jump-table-sourced block offset
+                let inst = Inst::AluRRR {
+                    alu_op: ALUOp::Add64,
+                    rd: rtmp1,
+                    rn: rtmp1.to_reg(),
+                    rm: rtmp2.to_reg(),
+                };
+                inst.emit(sink, emit_info, state);
+                // Branch to computed address. (`targets` here is only used for successor queries
+                // and is not needed for emission.)
+                let inst = Inst::IndirectBr {
+                    rn: rtmp1.to_reg(),
+                    targets: vec![],
+                };
+                inst.emit(sink, emit_info, state);
+                // Emit jump table (table of 32-bit offsets).
+                let jt_off = sink.cur_offset();
+                for &target in info.targets.iter() {
+                    let word_off = sink.cur_offset();
+                    // off_into_table is an addend here embedded in the label to be later patched
+                    // at the end of codegen. The offset is initially relative to this jump table
+                    // entry; with the extra addend, it'll be relative to the jump table's start,
+                    // after patching.
+                    let off_into_table = word_off - jt_off;
+                    sink.use_label_at_offset(
+                        word_off,
+                        target.as_label().unwrap(),
+                        LabelUse::PCRel32,
+                    );
+                    sink.put4(off_into_table);
+                }
+
+                // Lowering produces an EmitIsland before using a JTSequence, so we can safely
+                // disable the worst-case-size check in this case.
+                start_off = sink.cur_offset();
+            }
+            &Inst::LoadExtName {
+                rd,
+                ref name,
+                offset,
+            } => {
+                let inst = Inst::ULoad64 {
+                    rd,
+                    mem: AMode::Label(MemLabel::PCRel(8)),
+                    flags: MemFlags::trusted(),
+                };
+                inst.emit(sink, emit_info, state);
+                let inst = Inst::Jump {
+                    dest: BranchTarget::ResolvedOffset(12),
+                };
+                inst.emit(sink, emit_info, state);
+                let srcloc = state.cur_srcloc();
+                sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
+                if emit_info.flags().emit_all_ones_funcaddrs() {
+                    sink.put8(u64::max_value());
+                } else {
+                    sink.put8(0);
+                }
+            }
+            &Inst::LoadAddr { rd, ref mem } => {
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+                for inst in mem_insts.into_iter() {
+                    inst.emit(sink, emit_info, state);
+                }
+
+                let (reg, index_reg, offset) = match mem {
+                    AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
+                    AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
+                    AMode::UnsignedOffset(r, uimm12scaled) => {
+                        (r, None, uimm12scaled.value() as i32)
+                    }
+                    _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
+                };
+                let abs_offset = if offset < 0 {
+                    -offset as u64
+                } else {
+                    offset as u64
+                };
+                let alu_op = if offset < 0 {
+                    ALUOp::Sub64
+                } else {
+                    ALUOp::Add64
+                };
+
+                if let Some((idx, extendop)) = index_reg {
+                    let add = Inst::AluRRRExtend {
+                        alu_op: ALUOp::Add64,
+                        rd,
+                        rn: reg,
+                        rm: idx,
+                        extendop,
+                    };
+
+                    add.emit(sink, emit_info, state);
+                } else if offset == 0 {
+                    if reg != rd.to_reg() {
+                        let mov = Inst::mov(rd, reg);
+
+                        mov.emit(sink, emit_info, state);
+                    }
+                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
+                    let add = Inst::AluRRImm12 {
+                        alu_op,
+                        rd,
+                        rn: reg,
+                        imm12,
+                    };
+                    add.emit(sink, emit_info, state);
+                } else {
+                    // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
+                    // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
+                    // that no other instructions will be inserted here (we're emitting directly),
+                    // and a live range of `tmp2` should not span this instruction, so this use
+                    // should otherwise be correct.
+                    debug_assert!(rd.to_reg() != tmp2_reg());
+                    debug_assert!(reg != tmp2_reg());
+                    let tmp = writable_tmp2_reg();
+                    for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
+                        insn.emit(sink, emit_info, state);
+                    }
+                    let add = Inst::AluRRR {
+                        alu_op,
+                        rd,
+                        rn: reg,
+                        rm: tmp.to_reg(),
+                    };
+                    add.emit(sink, emit_info, state);
+                }
+            }
+            &Inst::VirtualSPOffsetAdj { offset } => {
+                debug!(
+                    "virtual sp offset adjusted by {} -> {}",
+                    offset,
+                    state.virtual_sp_offset + offset,
+                );
+                state.virtual_sp_offset += offset;
+            }
+            &Inst::EmitIsland { needed_space } => {
+                if sink.island_needed(needed_space + 4) {
+                    let jump_around_label = sink.get_label();
+                    let jmp = Inst::Jump {
+                        dest: BranchTarget::Label(jump_around_label),
+                    };
+                    jmp.emit(sink, emit_info, state);
+                    sink.emit_island();
+                    sink.bind_label(jump_around_label);
+                }
+            }
+        }
+
+        let end_off = sink.cur_offset();
+        debug_assert!((end_off - start_off) <= Inst::worst_case_size());
+
+        state.clear_post_insn();
+    }
+
+    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+        self.print_with_state(mb_rru, state)
+    }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
new file mode 100644
index 0000000000..eb31963b5d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -0,0 +1,5143 @@
+use crate::ir::types::*;
+use crate::isa::aarch64::inst::*;
+use crate::isa::test_utils;
+use crate::isa::CallConv;
+use crate::settings;
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+
+#[test]
+fn test_aarch64_binemit() {
+    let mut insns = Vec::<(Inst, &str, &str)>::new();
+
+    // N.B.: the architecture is little-endian, so when transcribing the 32-bit
+    // hex instructions from e.g. objdump disassembly, one must swap the bytes
+    // seen below. (E.g., a `ret` is normally written as the u32 `D65F03C0`,
+    // but we write it here as C0035FD6.)
+
+    // Useful helper script to produce the encodings from the text:
+    //
+    //      #!/bin/sh
+    //      tmp=`mktemp /tmp/XXXXXXXX.o`
+    //      aarch64-linux-gnu-as /dev/stdin -o $tmp
+    //      aarch64-linux-gnu-objdump -d $tmp
+    //      rm -f $tmp
+    //
+    // Then:
+    //
+    //      $ echo "mov x1, x2" | aarch64inst.sh
+    insns.push((Inst::Ret, "C0035FD6", "ret"));
+    insns.push((Inst::Nop0, "", "nop-zero-len"));
+    insns.push((Inst::Nop4, "1F2003D5", "nop"));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Add32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100030B",
+        "add w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Add64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A400068B",
+        "add x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Sub32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100034B",
+        "sub w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Sub64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40006CB",
+        "sub x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Orr32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100032A",
+        "orr w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Orr64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40006AA",
+        "orr x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::And32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100030A",
+        "and w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::And64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A400068A",
+        "and x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::SubS32,
+            rd: writable_zero_reg(),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "5F00036B",
+        // TODO: Display as cmp
+        "subs wzr, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::SubS32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100036B",
+        "subs w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::SubS64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40006EB",
+        "subs x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::AddS32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100032B",
+        "adds w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::AddS64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40006AB",
+        "adds x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRImm12 {
+            alu_op: ALUOp::AddS64,
+            rd: writable_zero_reg(),
+            rn: xreg(5),
+            imm12: Imm12::maybe_from_u64(1).unwrap(),
+        },
+        "BF0400B1",
+        // TODO: Display as cmn.
+        "adds xzr, x5, #1",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::SDiv64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40CC69A",
+        "sdiv x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::UDiv64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A408C69A",
+        "udiv x4, x5, x6",
+    ));
+
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Eor32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A400064A",
+        "eor w4, w5, w6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Eor64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40006CA",
+        "eor x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::AndNot32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A400260A",
+        "bic w4, w5, w6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::AndNot64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A400268A",
+        "bic x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::OrrNot32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A400262A",
+        "orn w4, w5, w6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::OrrNot64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40026AA",
+        "orn x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::EorNot32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A400264A",
+        "eon w4, w5, w6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::EorNot64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40026CA",
+        "eon x4, x5, x6",
+    ));
+
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::RotR32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A42CC61A",
+        "ror w4, w5, w6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::RotR64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A42CC69A",
+        "ror x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Lsr32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A424C61A",
+        "lsr w4, w5, w6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Lsr64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A424C69A",
+        "lsr x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Asr32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A428C61A",
+        "asr w4, w5, w6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Asr64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A428C69A",
+        "asr x4, x5, x6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Lsl32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A420C61A",
+        "lsl w4, w5, w6",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Lsl64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A420C69A",
+        "lsl x4, x5, x6",
+    ));
+
+    insns.push((
+        Inst::AluRRImm12 {
+            alu_op: ALUOp::Add32,
+            rd: writable_xreg(7),
+            rn: xreg(8),
+            imm12: Imm12 {
+                bits: 0x123,
+                shift12: false,
+            },
+        },
+        "078D0411",
+        "add w7, w8, #291",
+    ));
+    insns.push((
+        Inst::AluRRImm12 {
+            alu_op: ALUOp::Add32,
+            rd: writable_xreg(7),
+            rn: xreg(8),
+            imm12: Imm12 {
+                bits: 0x123,
+                shift12: true,
+            },
+        },
+        "078D4411",
+        "add w7, w8, #1191936",
+    ));
+    insns.push((
+        Inst::AluRRImm12 {
+            alu_op: ALUOp::Add64,
+            rd: writable_xreg(7),
+            rn: xreg(8),
+            imm12: Imm12 {
+                bits: 0x123,
+                shift12: false,
+            },
+        },
+        "078D0491",
+        "add x7, x8, #291",
+    ));
+    insns.push((
+        Inst::AluRRImm12 {
+            alu_op: ALUOp::Sub32,
+            rd: writable_xreg(7),
+            rn: xreg(8),
+            imm12: Imm12 {
+                bits: 0x123,
+                shift12: false,
+            },
+        },
+        "078D0451",
+        "sub w7, w8, #291",
+    ));
+    insns.push((
+        Inst::AluRRImm12 {
+            alu_op: ALUOp::Sub64,
+            rd: writable_xreg(7),
+            rn: xreg(8),
+            imm12: Imm12 {
+                bits: 0x123,
+                shift12: false,
+            },
+        },
+        "078D04D1",
+        "sub x7, x8, #291",
+    ));
+    insns.push((
+        Inst::AluRRImm12 {
+            alu_op: ALUOp::SubS32,
+            rd: writable_xreg(7),
+            rn: xreg(8),
+            imm12: Imm12 {
+                bits: 0x123,
+                shift12: false,
+            },
+        },
+        "078D0471",
+        "subs w7, w8, #291",
+    ));
+    insns.push((
+        Inst::AluRRImm12 {
+            alu_op: ALUOp::SubS64,
+            rd: writable_xreg(7),
+            rn: xreg(8),
+            imm12: Imm12 {
+                bits: 0x123,
+                shift12: false,
+            },
+        },
+        "078D04F1",
+        "subs x7, x8, #291",
+    ));
+
+    insns.push((
+        Inst::AluRRRExtend {
+            alu_op: ALUOp::Add32,
+            rd: writable_xreg(7),
+            rn: xreg(8),
+            rm: xreg(9),
+            extendop: ExtendOp::SXTB,
+        },
+        "0781290B",
+        "add w7, w8, w9, SXTB",
+    ));
+
+    insns.push((
+        Inst::AluRRRExtend {
+            alu_op: ALUOp::Add64,
+            rd: writable_xreg(15),
+            rn: xreg(16),
+            rm: xreg(17),
+            extendop: ExtendOp::UXTB,
+        },
+        "0F02318B",
+        "add x15, x16, x17, UXTB",
+    ));
+
+    insns.push((
+        Inst::AluRRRExtend {
+            alu_op: ALUOp::Sub32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+            extendop: ExtendOp::SXTH,
+        },
+        "41A0234B",
+        "sub w1, w2, w3, SXTH",
+    ));
+
+    insns.push((
+        Inst::AluRRRExtend {
+            alu_op: ALUOp::Sub64,
+            rd: writable_xreg(20),
+            rn: xreg(21),
+            rm: xreg(22),
+            extendop: ExtendOp::UXTW,
+        },
+        "B44236CB",
+        "sub x20, x21, x22, UXTW",
+    ));
+
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::Add32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(20).unwrap(),
+            ),
+        },
+        "6A510C0B",
+        "add w10, w11, w12, LSL 20",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::Add64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::ASR,
+                ShiftOpShiftImm::maybe_from_shift(42).unwrap(),
+            ),
+        },
+        "6AA98C8B",
+        "add x10, x11, x12, ASR 42",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::Sub32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0C4B",
+        "sub w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::Sub64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0CCB",
+        "sub x10, x11, x12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::Orr32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0C2A",
+        "orr w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::Orr64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0CAA",
+        "orr x10, x11, x12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::And32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0C0A",
+        "and w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::And64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0C8A",
+        "and x10, x11, x12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::Eor32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0C4A",
+        "eor w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::Eor64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0CCA",
+        "eor x10, x11, x12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::OrrNot32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D2C2A",
+        "orn w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::OrrNot64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D2CAA",
+        "orn x10, x11, x12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::AndNot32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D2C0A",
+        "bic w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::AndNot64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D2C8A",
+        "bic x10, x11, x12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::EorNot32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D2C4A",
+        "eon w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::EorNot64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D2CCA",
+        "eon x10, x11, x12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::AddS32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0C2B",
+        "adds w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::AddS64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0CAB",
+        "adds x10, x11, x12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::SubS32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0C6B",
+        "subs w10, w11, w12, LSL 23",
+    ));
+    insns.push((
+        Inst::AluRRRShift {
+            alu_op: ALUOp::SubS64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            rm: xreg(12),
+            shiftop: ShiftOpAndAmt::new(
+                ShiftOp::LSL,
+                ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+            ),
+        },
+        "6A5D0CEB",
+        "subs x10, x11, x12, LSL 23",
+    ));
+
+    insns.push((
+        Inst::AluRRRExtend {
+            alu_op: ALUOp::SubS64,
+            rd: writable_zero_reg(),
+            rn: stack_reg(),
+            rm: xreg(12),
+            extendop: ExtendOp::UXTX,
+        },
+        "FF632CEB",
+        "subs xzr, sp, x12, UXTX",
+    ));
+
+    insns.push((
+        Inst::AluRRRR {
+            alu_op: ALUOp3::MAdd32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+            ra: xreg(4),
+        },
+        "4110031B",
+        "madd w1, w2, w3, w4",
+    ));
+    insns.push((
+        Inst::AluRRRR {
+            alu_op: ALUOp3::MAdd64,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+            ra: xreg(4),
+        },
+        "4110039B",
+        "madd x1, x2, x3, x4",
+    ));
+    insns.push((
+        Inst::AluRRRR {
+            alu_op: ALUOp3::MSub32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+            ra: xreg(4),
+        },
+        "4190031B",
+        "msub w1, w2, w3, w4",
+    ));
+    insns.push((
+        Inst::AluRRRR {
+            alu_op: ALUOp3::MSub64,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+            ra: xreg(4),
+        },
+        "4190039B",
+        "msub x1, x2, x3, x4",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::SMulH,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "417C439B",
+        "smulh x1, x2, x3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::UMulH,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "417CC39B",
+        "umulh x1, x2, x3",
+    ));
+
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::RotR32,
+            rd: writable_xreg(20),
+            rn: xreg(21),
+            immshift: ImmShift::maybe_from_u64(19).unwrap(),
+        },
+        "B44E9513",
+        "ror w20, w21, #19",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::RotR64,
+            rd: writable_xreg(20),
+            rn: xreg(21),
+            immshift: ImmShift::maybe_from_u64(42).unwrap(),
+        },
+        "B4AAD593",
+        "ror x20, x21, #42",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::Lsr32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            immshift: ImmShift::maybe_from_u64(13).unwrap(),
+        },
+        "6A7D0D53",
+        "lsr w10, w11, #13",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::Lsr64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            immshift: ImmShift::maybe_from_u64(57).unwrap(),
+        },
+        "6AFD79D3",
+        "lsr x10, x11, #57",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::Asr32,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            immshift: ImmShift::maybe_from_u64(7).unwrap(),
+        },
+        "A47C0713",
+        "asr w4, w5, #7",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::Asr64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            immshift: ImmShift::maybe_from_u64(35).unwrap(),
+        },
+        "A4FC6393",
+        "asr x4, x5, #35",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::Lsl32,
+            rd: writable_xreg(8),
+            rn: xreg(9),
+            immshift: ImmShift::maybe_from_u64(24).unwrap(),
+        },
+        "281D0853",
+        "lsl w8, w9, #24",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::Lsl64,
+            rd: writable_xreg(8),
+            rn: xreg(9),
+            immshift: ImmShift::maybe_from_u64(63).unwrap(),
+        },
+        "280141D3",
+        "lsl x8, x9, #63",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::Lsl32,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            immshift: ImmShift::maybe_from_u64(0).unwrap(),
+        },
+        "6A7D0053",
+        "lsl w10, w11, #0",
+    ));
+    insns.push((
+        Inst::AluRRImmShift {
+            alu_op: ALUOp::Lsl64,
+            rd: writable_xreg(10),
+            rn: xreg(11),
+            immshift: ImmShift::maybe_from_u64(0).unwrap(),
+        },
+        "6AFD40D3",
+        "lsl x10, x11, #0",
+    ));
+
+    insns.push((
+        Inst::AluRRImmLogic {
+            alu_op: ALUOp::And32,
+            rd: writable_xreg(21),
+            rn: xreg(27),
+            imml: ImmLogic::maybe_from_u64(0x80003fff, I32).unwrap(),
+        },
+        "753B0112",
+        "and w21, w27, #2147500031",
+    ));
+    insns.push((
+        Inst::AluRRImmLogic {
+            alu_op: ALUOp::And64,
+            rd: writable_xreg(7),
+            rn: xreg(6),
+            imml: ImmLogic::maybe_from_u64(0x3fff80003fff800, I64).unwrap(),
+        },
+        "C7381592",
+        "and x7, x6, #288221580125796352",
+    ));
+    insns.push((
+        Inst::AluRRImmLogic {
+            alu_op: ALUOp::Orr32,
+            rd: writable_xreg(1),
+            rn: xreg(5),
+            imml: ImmLogic::maybe_from_u64(0x100000, I32).unwrap(),
+        },
+        "A1000C32",
+        "orr w1, w5, #1048576",
+    ));
+    insns.push((
+        Inst::AluRRImmLogic {
+            alu_op: ALUOp::Orr64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(),
+        },
+        "A4C401B2",
+        "orr x4, x5, #9331882296111890817",
+    ));
+    insns.push((
+        Inst::AluRRImmLogic {
+            alu_op: ALUOp::Eor32,
+            rd: writable_xreg(1),
+            rn: xreg(5),
+            imml: ImmLogic::maybe_from_u64(0x00007fff, I32).unwrap(),
+        },
+        "A1380052",
+        "eor w1, w5, #32767",
+    ));
+    insns.push((
+        Inst::AluRRImmLogic {
+            alu_op: ALUOp::Eor64,
+            rd: writable_xreg(10),
+            rn: xreg(8),
+            imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(),
+        },
+        "0AC501D2",
+        "eor x10, x8, #9331882296111890817",
+    ));
+
+    insns.push((
+        Inst::BitRR {
+            op: BitOp::RBit32,
+            rd: writable_xreg(1),
+            rn: xreg(10),
+        },
+        "4101C05A",
+        "rbit w1, w10",
+    ));
+
+    insns.push((
+        Inst::BitRR {
+            op: BitOp::RBit64,
+            rd: writable_xreg(1),
+            rn: xreg(10),
+        },
+        "4101C0DA",
+        "rbit x1, x10",
+    ));
+
+    insns.push((
+        Inst::BitRR {
+            op: BitOp::Clz32,
+            rd: writable_xreg(15),
+            rn: xreg(3),
+        },
+        "6F10C05A",
+        "clz w15, w3",
+    ));
+
+    insns.push((
+        Inst::BitRR {
+            op: BitOp::Clz64,
+            rd: writable_xreg(15),
+            rn: xreg(3),
+        },
+        "6F10C0DA",
+        "clz x15, x3",
+    ));
+
+    insns.push((
+        Inst::BitRR {
+            op: BitOp::Cls32,
+            rd: writable_xreg(21),
+            rn: xreg(16),
+        },
+        "1516C05A",
+        "cls w21, w16",
+    ));
+
+    insns.push((
+        Inst::BitRR {
+            op: BitOp::Cls64,
+            rd: writable_xreg(21),
+            rn: xreg(16),
+        },
+        "1516C0DA",
+        "cls x21, x16",
+    ));
+
+    insns.push((
+        Inst::ULoad8 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "41004038",
+        "ldurb w1, [x2]",
+    ));
+    insns.push((
+        Inst::ULoad8 {
+            rd: writable_xreg(1),
+            mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::zero(I8)),
+            flags: MemFlags::trusted(),
+        },
+        "41004039",
+        "ldrb w1, [x2]",
+    ));
+    insns.push((
+        Inst::ULoad8 {
+            rd: writable_xreg(1),
+            mem: AMode::RegReg(xreg(2), xreg(5)),
+            flags: MemFlags::trusted(),
+        },
+        "41686538",
+        "ldrb w1, [x2, x5]",
+    ));
+    insns.push((
+        Inst::SLoad8 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "41008038",
+        "ldursb x1, [x2]",
+    ));
+    insns.push((
+        Inst::SLoad8 {
+            rd: writable_xreg(1),
+            mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(63, I8).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41FC8039",
+        "ldrsb x1, [x2, #63]",
+    ));
+    insns.push((
+        Inst::SLoad8 {
+            rd: writable_xreg(1),
+            mem: AMode::RegReg(xreg(2), xreg(5)),
+            flags: MemFlags::trusted(),
+        },
+        "4168A538",
+        "ldrsb x1, [x2, x5]",
+    ));
+    insns.push((
+        Inst::ULoad16 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(5).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41504078",
+        "ldurh w1, [x2, #5]",
+    ));
+    insns.push((
+        Inst::ULoad16 {
+            rd: writable_xreg(1),
+            mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8, I16).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41104079",
+        "ldrh w1, [x2, #8]",
+    ));
+    insns.push((
+        Inst::ULoad16 {
+            rd: writable_xreg(1),
+            mem: AMode::RegScaled(xreg(2), xreg(3), I16),
+            flags: MemFlags::trusted(),
+        },
+        "41786378",
+        "ldrh w1, [x2, x3, LSL #1]",
+    ));
+    insns.push((
+        Inst::SLoad16 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "41008078",
+        "ldursh x1, [x2]",
+    ));
+    insns.push((
+        Inst::SLoad16 {
+            rd: writable_xreg(28),
+            mem: AMode::UnsignedOffset(xreg(20), UImm12Scaled::maybe_from_i64(24, I16).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "9C328079",
+        "ldrsh x28, [x20, #24]",
+    ));
+    insns.push((
+        Inst::SLoad16 {
+            rd: writable_xreg(28),
+            mem: AMode::RegScaled(xreg(20), xreg(20), I16),
+            flags: MemFlags::trusted(),
+        },
+        "9C7AB478",
+        "ldrsh x28, [x20, x20, LSL #1]",
+    ));
+    insns.push((
+        Inst::ULoad32 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "410040B8",
+        "ldur w1, [x2]",
+    ));
+    insns.push((
+        Inst::ULoad32 {
+            rd: writable_xreg(12),
+            mem: AMode::UnsignedOffset(xreg(0), UImm12Scaled::maybe_from_i64(204, I32).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "0CCC40B9",
+        "ldr w12, [x0, #204]",
+    ));
+    insns.push((
+        Inst::ULoad32 {
+            rd: writable_xreg(1),
+            mem: AMode::RegScaled(xreg(2), xreg(12), I32),
+            flags: MemFlags::trusted(),
+        },
+        "41786CB8",
+        "ldr w1, [x2, x12, LSL #2]",
+    ));
+    insns.push((
+        Inst::SLoad32 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "410080B8",
+        "ldursw x1, [x2]",
+    ));
+    insns.push((
+        Inst::SLoad32 {
+            rd: writable_xreg(12),
+            mem: AMode::UnsignedOffset(xreg(1), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "2CFCBFB9",
+        "ldrsw x12, [x1, #16380]",
+    ));
+    insns.push((
+        Inst::SLoad32 {
+            rd: writable_xreg(1),
+            mem: AMode::RegScaled(xreg(5), xreg(1), I32),
+            flags: MemFlags::trusted(),
+        },
+        "A178A1B8",
+        "ldrsw x1, [x5, x1, LSL #2]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "410040F8",
+        "ldur x1, [x2]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(-256).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "410050F8",
+        "ldur x1, [x2, #-256]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(255).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41F04FF8",
+        "ldur x1, [x2, #255]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41FC7FF9",
+        "ldr x1, [x2, #32760]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::RegReg(xreg(2), xreg(3)),
+            flags: MemFlags::trusted(),
+        },
+        "416863F8",
+        "ldr x1, [x2, x3]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::RegScaled(xreg(2), xreg(3), I64),
+            flags: MemFlags::trusted(),
+        },
+        "417863F8",
+        "ldr x1, [x2, x3, LSL #3]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::SXTW),
+            flags: MemFlags::trusted(),
+        },
+        "41D863F8",
+        "ldr x1, [x2, w3, SXTW #3]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::SXTW),
+            flags: MemFlags::trusted(),
+        },
+        "41C863F8",
+        "ldr x1, [x2, w3, SXTW]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::Label(MemLabel::PCRel(64)),
+            flags: MemFlags::trusted(),
+        },
+        "01020058",
+        "ldr x1, pc+64",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "410C41F8",
+        "ldr x1, [x2, #16]!",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "410441F8",
+        "ldr x1, [x2], #16",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::FPOffset(32768, I8),
+            flags: MemFlags::trusted(),
+        },
+        "100090D2B063308B010240F9",
+        "movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::FPOffset(-32768, I8),
+            flags: MemFlags::trusted(),
+        },
+        "F0FF8F92B063308B010240F9",
+        "movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::FPOffset(1048576, I8), // 2^20
+            flags: MemFlags::trusted(),
+        },
+        "1002A0D2B063308B010240F9",
+        "movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+    ));
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1
+            flags: MemFlags::trusted(),
+        },
+        "300080521002A072B063308B010240F9",
+        "movz w16, #1 ; movk w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+    ));
+
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::RegOffset(xreg(7), 8, I64),
+            flags: MemFlags::trusted(),
+        },
+        "E18040F8",
+        "ldur x1, [x7, #8]",
+    ));
+
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::RegOffset(xreg(7), 1024, I64),
+            flags: MemFlags::trusted(),
+        },
+        "E10042F9",
+        "ldr x1, [x7, #1024]",
+    ));
+
+    insns.push((
+        Inst::ULoad64 {
+            rd: writable_xreg(1),
+            mem: AMode::RegOffset(xreg(7), 1048576, I64),
+            flags: MemFlags::trusted(),
+        },
+        "1002A0D2F060308B010240F9",
+        "movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]",
+    ));
+
+    insns.push((
+        Inst::Store8 {
+            rd: xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "41000038",
+        "sturb w1, [x2]",
+    ));
+    insns.push((
+        Inst::Store8 {
+            rd: xreg(1),
+            mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(4095, I8).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41FC3F39",
+        "strb w1, [x2, #4095]",
+    ));
+    insns.push((
+        Inst::Store16 {
+            rd: xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "41000078",
+        "sturh w1, [x2]",
+    ));
+    insns.push((
+        Inst::Store16 {
+            rd: xreg(1),
+            mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8190, I16).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41FC3F79",
+        "strh w1, [x2, #8190]",
+    ));
+    insns.push((
+        Inst::Store32 {
+            rd: xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "410000B8",
+        "stur w1, [x2]",
+    ));
+    insns.push((
+        Inst::Store32 {
+            rd: xreg(1),
+            mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41FC3FB9",
+        "str w1, [x2, #16380]",
+    ));
+    insns.push((
+        Inst::Store64 {
+            rd: xreg(1),
+            mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+            flags: MemFlags::trusted(),
+        },
+        "410000F8",
+        "stur x1, [x2]",
+    ));
+    insns.push((
+        Inst::Store64 {
+            rd: xreg(1),
+            mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "41FC3FF9",
+        "str x1, [x2, #32760]",
+    ));
+    insns.push((
+        Inst::Store64 {
+            rd: xreg(1),
+            mem: AMode::RegReg(xreg(2), xreg(3)),
+            flags: MemFlags::trusted(),
+        },
+        "416823F8",
+        "str x1, [x2, x3]",
+    ));
+    insns.push((
+        Inst::Store64 {
+            rd: xreg(1),
+            mem: AMode::RegScaled(xreg(2), xreg(3), I64),
+            flags: MemFlags::trusted(),
+        },
+        "417823F8",
+        "str x1, [x2, x3, LSL #3]",
+    ));
+    insns.push((
+        Inst::Store64 {
+            rd: xreg(1),
+            mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::UXTW),
+            flags: MemFlags::trusted(),
+        },
+        "415823F8",
+        "str x1, [x2, w3, UXTW #3]",
+    ));
+    insns.push((
+        Inst::Store64 {
+            rd: xreg(1),
+            mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::UXTW),
+            flags: MemFlags::trusted(),
+        },
+        "414823F8",
+        "str x1, [x2, w3, UXTW]",
+    ));
+    insns.push((
+        Inst::Store64 {
+            rd: xreg(1),
+            mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "410C01F8",
+        "str x1, [x2, #16]!",
+    ));
+    insns.push((
+        Inst::Store64 {
+            rd: xreg(1),
+            mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "410401F8",
+        "str x1, [x2], #16",
+    ));
+
+    insns.push((
+        Inst::StoreP64 {
+            rt: xreg(8),
+            rt2: xreg(9),
+            mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)),
+            flags: MemFlags::trusted(),
+        },
+        "482500A9",
+        "stp x8, x9, [x10]",
+    ));
+    insns.push((
+        Inst::StoreP64 {
+            rt: xreg(8),
+            rt2: xreg(9),
+            mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "48A51FA9",
+        "stp x8, x9, [x10, #504]",
+    ));
+    insns.push((
+        Inst::StoreP64 {
+            rt: xreg(8),
+            rt2: xreg(9),
+            mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "48253CA9",
+        "stp x8, x9, [x10, #-64]",
+    ));
+    insns.push((
+        Inst::StoreP64 {
+            rt: xreg(21),
+            rt2: xreg(28),
+            mem: PairAMode::SignedOffset(xreg(1), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "357020A9",
+        "stp x21, x28, [x1, #-512]",
+    ));
+    insns.push((
+        Inst::StoreP64 {
+            rt: xreg(8),
+            rt2: xreg(9),
+            mem: PairAMode::PreIndexed(
+                writable_xreg(10),
+                SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+            ),
+            flags: MemFlags::trusted(),
+        },
+        "4825BCA9",
+        "stp x8, x9, [x10, #-64]!",
+    ));
+    insns.push((
+        Inst::StoreP64 {
+            rt: xreg(15),
+            rt2: xreg(16),
+            mem: PairAMode::PostIndexed(
+                writable_xreg(20),
+                SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+            ),
+            flags: MemFlags::trusted(),
+        },
+        "8FC29FA8",
+        "stp x15, x16, [x20], #504",
+    ));
+
+    insns.push((
+        Inst::LoadP64 {
+            rt: writable_xreg(8),
+            rt2: writable_xreg(9),
+            mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)),
+            flags: MemFlags::trusted(),
+        },
+        "482540A9",
+        "ldp x8, x9, [x10]",
+    ));
+    insns.push((
+        Inst::LoadP64 {
+            rt: writable_xreg(8),
+            rt2: writable_xreg(9),
+            mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "48A55FA9",
+        "ldp x8, x9, [x10, #504]",
+    ));
+    insns.push((
+        Inst::LoadP64 {
+            rt: writable_xreg(8),
+            rt2: writable_xreg(9),
+            mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "48257CA9",
+        "ldp x8, x9, [x10, #-64]",
+    ));
+    insns.push((
+        Inst::LoadP64 {
+            rt: writable_xreg(8),
+            rt2: writable_xreg(9),
+            mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()),
+            flags: MemFlags::trusted(),
+        },
+        "482560A9",
+        "ldp x8, x9, [x10, #-512]",
+    ));
+    insns.push((
+        Inst::LoadP64 {
+            rt: writable_xreg(8),
+            rt2: writable_xreg(9),
+            mem: PairAMode::PreIndexed(
+                writable_xreg(10),
+                SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+            ),
+            flags: MemFlags::trusted(),
+        },
+        "4825FCA9",
+        "ldp x8, x9, [x10, #-64]!",
+    ));
+    insns.push((
+        Inst::LoadP64 {
+            rt: writable_xreg(8),
+            rt2: writable_xreg(25),
+            mem: PairAMode::PostIndexed(
+                writable_xreg(12),
+                SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+            ),
+            flags: MemFlags::trusted(),
+        },
+        "88E5DFA8",
+        "ldp x8, x25, [x12], #504",
+    ));
+
+    insns.push((
+        Inst::Mov64 {
+            rd: writable_xreg(8),
+            rm: xreg(9),
+        },
+        "E80309AA",
+        "mov x8, x9",
+    ));
+    insns.push((
+        Inst::Mov32 {
+            rd: writable_xreg(8),
+            rm: xreg(9),
+        },
+        "E803092A",
+        "mov w8, w9",
+    ));
+
+    insns.push((
+        Inst::MovZ {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FF9FD2",
+        "movz x8, #65535",
+    ));
+    insns.push((
+        Inst::MovZ {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFBFD2",
+        "movz x8, #65535, LSL #16",
+    ));
+    insns.push((
+        Inst::MovZ {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFDFD2",
+        "movz x8, #65535, LSL #32",
+    ));
+    insns.push((
+        Inst::MovZ {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFFFD2",
+        "movz x8, #65535, LSL #48",
+    ));
+    insns.push((
+        Inst::MovZ {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            size: OperandSize::Size32,
+        },
+        "E8FFBF52",
+        "movz w8, #65535, LSL #16",
+    ));
+
+    insns.push((
+        Inst::MovN {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FF9F92",
+        "movn x8, #65535",
+    ));
+    insns.push((
+        Inst::MovN {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFBF92",
+        "movn x8, #65535, LSL #16",
+    ));
+    insns.push((
+        Inst::MovN {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFDF92",
+        "movn x8, #65535, LSL #32",
+    ));
+    insns.push((
+        Inst::MovN {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFFF92",
+        "movn x8, #65535, LSL #48",
+    ));
+    insns.push((
+        Inst::MovN {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            size: OperandSize::Size32,
+        },
+        "E8FF9F12",
+        "movn w8, #65535",
+    ));
+
+    insns.push((
+        Inst::MovK {
+            rd: writable_xreg(12),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "0C0080F2",
+        "movk x12, #0",
+    ));
+    insns.push((
+        Inst::MovK {
+            rd: writable_xreg(19),
+            imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "1300A0F2",
+        "movk x19, #0, LSL #16",
+    ));
+    insns.push((
+        Inst::MovK {
+            rd: writable_xreg(3),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E3FF9FF2",
+        "movk x3, #65535",
+    ));
+    insns.push((
+        Inst::MovK {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFBFF2",
+        "movk x8, #65535, LSL #16",
+    ));
+    insns.push((
+        Inst::MovK {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFDFF2",
+        "movk x8, #65535, LSL #32",
+    ));
+    insns.push((
+        Inst::MovK {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            size: OperandSize::Size64,
+        },
+        "E8FFFFF2",
+        "movk x8, #65535, LSL #48",
+    ));
+
+    insns.push((
+        Inst::CSel {
+            rd: writable_xreg(10),
+            rn: xreg(12),
+            rm: xreg(14),
+            cond: Cond::Hs,
+        },
+        "8A218E9A",
+        "csel x10, x12, x14, hs",
+    ));
+    insns.push((
+        Inst::CSet {
+            rd: writable_xreg(15),
+            cond: Cond::Ge,
+        },
+        "EFB79F9A",
+        "cset x15, ge",
+    ));
+    insns.push((
+        Inst::CCmpImm {
+            size: OperandSize::Size64,
+            rn: xreg(22),
+            imm: UImm5::maybe_from_u8(5).unwrap(),
+            nzcv: NZCV::new(false, false, true, true),
+            cond: Cond::Eq,
+        },
+        "C30A45FA",
+        "ccmp x22, #5, #nzCV, eq",
+    ));
+    insns.push((
+        Inst::CCmpImm {
+            size: OperandSize::Size32,
+            rn: xreg(3),
+            imm: UImm5::maybe_from_u8(30).unwrap(),
+            nzcv: NZCV::new(true, true, true, true),
+            cond: Cond::Gt,
+        },
+        "6FC85E7A",
+        "ccmp w3, #30, #NZCV, gt",
+    ));
+    insns.push((
+        Inst::MovToFpu {
+            rd: writable_vreg(31),
+            rn: xreg(0),
+            size: ScalarSize::Size64,
+        },
+        "1F00679E",
+        "fmov d31, x0",
+    ));
+    insns.push((
+        Inst::MovToFpu {
+            rd: writable_vreg(1),
+            rn: xreg(28),
+            size: ScalarSize::Size32,
+        },
+        "8103271E",
+        "fmov s1, w28",
+    ));
+    insns.push((
+        Inst::MovToVec {
+            rd: writable_vreg(0),
+            rn: xreg(0),
+            idx: 7,
+            size: VectorSize::Size8x8,
+        },
+        "001C0F4E",
+        "mov v0.b[7], w0",
+    ));
+    insns.push((
+        Inst::MovToVec {
+            rd: writable_vreg(20),
+            rn: xreg(21),
+            idx: 0,
+            size: VectorSize::Size64x2,
+        },
+        "B41E084E",
+        "mov v20.d[0], x21",
+    ));
+    insns.push((
+        Inst::MovFromVec {
+            rd: writable_xreg(3),
+            rn: vreg(27),
+            idx: 14,
+            size: VectorSize::Size8x16,
+        },
+        "633F1D0E",
+        "umov w3, v27.b[14]",
+    ));
+    insns.push((
+        Inst::MovFromVec {
+            rd: writable_xreg(24),
+            rn: vreg(5),
+            idx: 3,
+            size: VectorSize::Size16x8,
+        },
+        "B83C0E0E",
+        "umov w24, v5.h[3]",
+    ));
+    insns.push((
+        Inst::MovFromVec {
+            rd: writable_xreg(12),
+            rn: vreg(17),
+            idx: 1,
+            size: VectorSize::Size32x4,
+        },
+        "2C3E0C0E",
+        "mov w12, v17.s[1]",
+    ));
+    insns.push((
+        Inst::MovFromVec {
+            rd: writable_xreg(21),
+            rn: vreg(20),
+            idx: 0,
+            size: VectorSize::Size64x2,
+        },
+        "953E084E",
+        "mov x21, v20.d[0]",
+    ));
+    insns.push((
+        Inst::MovFromVecSigned {
+            rd: writable_xreg(0),
+            rn: vreg(0),
+            idx: 15,
+            size: VectorSize::Size8x16,
+            scalar_size: OperandSize::Size32,
+        },
+        "002C1F0E",
+        "smov w0, v0.b[15]",
+    ));
+    insns.push((
+        Inst::MovFromVecSigned {
+            rd: writable_xreg(12),
+            rn: vreg(13),
+            idx: 7,
+            size: VectorSize::Size8x8,
+            scalar_size: OperandSize::Size64,
+        },
+        "AC2D0F4E",
+        "smov x12, v13.b[7]",
+    ));
+    insns.push((
+        Inst::MovFromVecSigned {
+            rd: writable_xreg(23),
+            rn: vreg(31),
+            idx: 7,
+            size: VectorSize::Size16x8,
+            scalar_size: OperandSize::Size32,
+        },
+        "F72F1E0E",
+        "smov w23, v31.h[7]",
+    ));
+    insns.push((
+        Inst::MovFromVecSigned {
+            rd: writable_xreg(24),
+            rn: vreg(5),
+            idx: 1,
+            size: VectorSize::Size32x2,
+            scalar_size: OperandSize::Size64,
+        },
+        "B82C0C4E",
+        "smov x24, v5.s[1]",
+    ));
+    insns.push((
+        Inst::MovToNZCV { rn: xreg(13) },
+        "0D421BD5",
+        "msr nzcv, x13",
+    ));
+    insns.push((
+        Inst::MovFromNZCV {
+            rd: writable_xreg(27),
+        },
+        "1B423BD5",
+        "mrs x27, nzcv",
+    ));
+    insns.push((
+        Inst::VecDup {
+            rd: writable_vreg(25),
+            rn: xreg(7),
+            size: VectorSize::Size8x16,
+        },
+        "F90C014E",
+        "dup v25.16b, w7",
+    ));
+    insns.push((
+        Inst::VecDup {
+            rd: writable_vreg(2),
+            rn: xreg(23),
+            size: VectorSize::Size16x8,
+        },
+        "E20E024E",
+        "dup v2.8h, w23",
+    ));
+    insns.push((
+        Inst::VecDup {
+            rd: writable_vreg(0),
+            rn: xreg(28),
+            size: VectorSize::Size32x4,
+        },
+        "800F044E",
+        "dup v0.4s, w28",
+    ));
+    insns.push((
+        Inst::VecDup {
+            rd: writable_vreg(31),
+            rn: xreg(5),
+            size: VectorSize::Size64x2,
+        },
+        "BF0C084E",
+        "dup v31.2d, x5",
+    ));
+    insns.push((
+        Inst::VecDupFromFpu {
+            rd: writable_vreg(14),
+            rn: vreg(19),
+            size: VectorSize::Size32x4,
+        },
+        "6E06044E",
+        "dup v14.4s, v19.s[0]",
+    ));
+    insns.push((
+        Inst::VecDupFromFpu {
+            rd: writable_vreg(18),
+            rn: vreg(10),
+            size: VectorSize::Size64x2,
+        },
+        "5205084E",
+        "dup v18.2d, v10.d[0]",
+    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(31),
+            imm: ASIMDMovModImm::maybe_from_u64(255, ScalarSize::Size8).unwrap(),
+            invert: false,
+            size: VectorSize::Size8x16,
+        },
+        "FFE7074F",
+        "movi v31.16b, #255",
+    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(0),
+            imm: ASIMDMovModImm::zero(),
+            invert: true,
+            size: VectorSize::Size16x4,
+        },
+        "0084002F",
+        "mvni v0.4h, #0",
+    ));
+    insns.push((
+        Inst::VecExtend {
+            t: VecExtendOp::Sxtl8,
+            rd: writable_vreg(4),
+            rn: vreg(27),
+            high_half: false,
+        },
+        "64A7080F",
+        "sxtl v4.8h, v27.8b",
+    ));
+    insns.push((
+        Inst::VecExtend {
+            t: VecExtendOp::Sxtl16,
+            rd: writable_vreg(17),
+            rn: vreg(19),
+            high_half: true,
+        },
+        "71A6104F",
+        "sxtl2 v17.4s, v19.8h",
+    ));
+    insns.push((
+        Inst::VecExtend {
+            t: VecExtendOp::Sxtl32,
+            rd: writable_vreg(30),
+            rn: vreg(6),
+            high_half: false,
+        },
+        "DEA4200F",
+        "sxtl v30.2d, v6.2s",
+    ));
+    insns.push((
+        Inst::VecExtend {
+            t: VecExtendOp::Uxtl8,
+            rd: writable_vreg(3),
+            rn: vreg(29),
+            high_half: true,
+        },
+        "A3A7086F",
+        "uxtl2 v3.8h, v29.16b",
+    ));
+    insns.push((
+        Inst::VecExtend {
+            t: VecExtendOp::Uxtl16,
+            rd: writable_vreg(15),
+            rn: vreg(12),
+            high_half: false,
+        },
+        "8FA5102F",
+        "uxtl v15.4s, v12.4h",
+    ));
+    insns.push((
+        Inst::VecExtend {
+            t: VecExtendOp::Uxtl32,
+            rd: writable_vreg(28),
+            rn: vreg(2),
+            high_half: true,
+        },
+        "5CA4206F",
+        "uxtl2 v28.2d, v2.4s",
+    ));
+
+    insns.push((
+        Inst::VecMovElement {
+            rd: writable_vreg(0),
+            rn: vreg(31),
+            dest_idx: 7,
+            src_idx: 7,
+            size: VectorSize::Size16x8,
+        },
+        "E0771E6E",
+        "mov v0.h[7], v31.h[7]",
+    ));
+
+    insns.push((
+        Inst::VecMovElement {
+            rd: writable_vreg(31),
+            rn: vreg(16),
+            dest_idx: 1,
+            src_idx: 0,
+            size: VectorSize::Size32x2,
+        },
+        "1F060C6E",
+        "mov v31.s[1], v16.s[0]",
+    ));
+
+    insns.push((
+        Inst::VecMiscNarrow {
+            op: VecMiscNarrowOp::Xtn,
+            rd: writable_vreg(22),
+            rn: vreg(8),
+            size: VectorSize::Size32x2,
+            high_half: false,
+        },
+        "1629A10E",
+        "xtn v22.2s, v8.2d",
+    ));
+
+    insns.push((
+        Inst::VecMiscNarrow {
+            op: VecMiscNarrowOp::Sqxtn,
+            rd: writable_vreg(31),
+            rn: vreg(0),
+            size: VectorSize::Size16x8,
+            high_half: true,
+        },
+        "1F48614E",
+        "sqxtn2 v31.8h, v0.4s",
+    ));
+
+    insns.push((
+        Inst::VecMiscNarrow {
+            op: VecMiscNarrowOp::Sqxtun,
+            rd: writable_vreg(16),
+            rn: vreg(23),
+            size: VectorSize::Size8x16,
+            high_half: false,
+        },
+        "F02A212E",
+        "sqxtun v16.8b, v23.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqadd,
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(8),
+            size: VectorSize::Size8x16,
+        },
+        "410C284E",
+        "sqadd v1.16b, v2.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqadd,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(28),
+            size: VectorSize::Size16x8,
+        },
+        "810D7C4E",
+        "sqadd v1.8h, v12.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqadd,
+            rd: writable_vreg(12),
+            rn: vreg(2),
+            rm: vreg(6),
+            size: VectorSize::Size32x4,
+        },
+        "4C0CA64E",
+        "sqadd v12.4s, v2.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqadd,
+            rd: writable_vreg(20),
+            rn: vreg(7),
+            rm: vreg(13),
+            size: VectorSize::Size64x2,
+        },
+        "F40CED4E",
+        "sqadd v20.2d, v7.2d, v13.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqsub,
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(8),
+            size: VectorSize::Size8x16,
+        },
+        "412C284E",
+        "sqsub v1.16b, v2.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqsub,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(28),
+            size: VectorSize::Size16x8,
+        },
+        "812D7C4E",
+        "sqsub v1.8h, v12.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqsub,
+            rd: writable_vreg(12),
+            rn: vreg(2),
+            rm: vreg(6),
+            size: VectorSize::Size32x4,
+        },
+        "4C2CA64E",
+        "sqsub v12.4s, v2.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqsub,
+            rd: writable_vreg(20),
+            rn: vreg(7),
+            rm: vreg(13),
+            size: VectorSize::Size64x2,
+        },
+        "F42CED4E",
+        "sqsub v20.2d, v7.2d, v13.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqadd,
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(8),
+            size: VectorSize::Size8x16,
+        },
+        "410C286E",
+        "uqadd v1.16b, v2.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqadd,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(28),
+            size: VectorSize::Size16x8,
+        },
+        "810D7C6E",
+        "uqadd v1.8h, v12.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqadd,
+            rd: writable_vreg(12),
+            rn: vreg(2),
+            rm: vreg(6),
+            size: VectorSize::Size32x4,
+        },
+        "4C0CA66E",
+        "uqadd v12.4s, v2.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqadd,
+            rd: writable_vreg(20),
+            rn: vreg(7),
+            rm: vreg(13),
+            size: VectorSize::Size64x2,
+        },
+        "F40CED6E",
+        "uqadd v20.2d, v7.2d, v13.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqsub,
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(8),
+            size: VectorSize::Size8x16,
+        },
+        "412C286E",
+        "uqsub v1.16b, v2.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqsub,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(28),
+            size: VectorSize::Size16x8,
+        },
+        "812D7C6E",
+        "uqsub v1.8h, v12.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqsub,
+            rd: writable_vreg(12),
+            rn: vreg(2),
+            rm: vreg(6),
+            size: VectorSize::Size32x4,
+        },
+        "4C2CA66E",
+        "uqsub v12.4s, v2.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqsub,
+            rd: writable_vreg(20),
+            rn: vreg(7),
+            rm: vreg(13),
+            size: VectorSize::Size64x2,
+        },
+        "F42CED6E",
+        "uqsub v20.2d, v7.2d, v13.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmeq,
+            rd: writable_vreg(3),
+            rn: vreg(23),
+            rm: vreg(24),
+            size: VectorSize::Size8x16,
+        },
+        "E38E386E",
+        "cmeq v3.16b, v23.16b, v24.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmgt,
+            rd: writable_vreg(3),
+            rn: vreg(23),
+            rm: vreg(24),
+            size: VectorSize::Size8x16,
+        },
+        "E336384E",
+        "cmgt v3.16b, v23.16b, v24.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmge,
+            rd: writable_vreg(23),
+            rn: vreg(9),
+            rm: vreg(12),
+            size: VectorSize::Size8x16,
+        },
+        "373D2C4E",
+        "cmge v23.16b, v9.16b, v12.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmhi,
+            rd: writable_vreg(5),
+            rn: vreg(1),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "2534216E",
+        "cmhi v5.16b, v1.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmhs,
+            rd: writable_vreg(8),
+            rn: vreg(2),
+            rm: vreg(15),
+            size: VectorSize::Size8x16,
+        },
+        "483C2F6E",
+        "cmhs v8.16b, v2.16b, v15.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmeq,
+            rd: writable_vreg(3),
+            rn: vreg(23),
+            rm: vreg(24),
+            size: VectorSize::Size16x8,
+        },
+        "E38E786E",
+        "cmeq v3.8h, v23.8h, v24.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmgt,
+            rd: writable_vreg(3),
+            rn: vreg(23),
+            rm: vreg(24),
+            size: VectorSize::Size16x8,
+        },
+        "E336784E",
+        "cmgt v3.8h, v23.8h, v24.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmge,
+            rd: writable_vreg(23),
+            rn: vreg(9),
+            rm: vreg(12),
+            size: VectorSize::Size16x8,
+        },
+        "373D6C4E",
+        "cmge v23.8h, v9.8h, v12.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmhi,
+            rd: writable_vreg(5),
+            rn: vreg(1),
+            rm: vreg(1),
+            size: VectorSize::Size16x8,
+        },
+        "2534616E",
+        "cmhi v5.8h, v1.8h, v1.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmhs,
+            rd: writable_vreg(8),
+            rn: vreg(2),
+            rm: vreg(15),
+            size: VectorSize::Size16x8,
+        },
+        "483C6F6E",
+        "cmhs v8.8h, v2.8h, v15.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmeq,
+            rd: writable_vreg(3),
+            rn: vreg(23),
+            rm: vreg(24),
+            size: VectorSize::Size32x4,
+        },
+        "E38EB86E",
+        "cmeq v3.4s, v23.4s, v24.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmgt,
+            rd: writable_vreg(3),
+            rn: vreg(23),
+            rm: vreg(24),
+            size: VectorSize::Size32x4,
+        },
+        "E336B84E",
+        "cmgt v3.4s, v23.4s, v24.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmge,
+            rd: writable_vreg(23),
+            rn: vreg(9),
+            rm: vreg(12),
+            size: VectorSize::Size32x4,
+        },
+        "373DAC4E",
+        "cmge v23.4s, v9.4s, v12.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmhi,
+            rd: writable_vreg(5),
+            rn: vreg(1),
+            rm: vreg(1),
+            size: VectorSize::Size32x4,
+        },
+        "2534A16E",
+        "cmhi v5.4s, v1.4s, v1.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Cmhs,
+            rd: writable_vreg(8),
+            rn: vreg(2),
+            rm: vreg(15),
+            size: VectorSize::Size32x4,
+        },
+        "483CAF6E",
+        "cmhs v8.4s, v2.4s, v15.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fcmeq,
+            rd: writable_vreg(28),
+            rn: vreg(12),
+            rm: vreg(4),
+            size: VectorSize::Size32x2,
+        },
+        "9CE5240E",
+        "fcmeq v28.2s, v12.2s, v4.2s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fcmgt,
+            rd: writable_vreg(3),
+            rn: vreg(16),
+            rm: vreg(31),
+            size: VectorSize::Size64x2,
+        },
+        "03E6FF6E",
+        "fcmgt v3.2d, v16.2d, v31.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fcmge,
+            rd: writable_vreg(18),
+            rn: vreg(23),
+            rm: vreg(0),
+            size: VectorSize::Size64x2,
+        },
+        "F2E6606E",
+        "fcmge v18.2d, v23.2d, v0.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::And,
+            rd: writable_vreg(20),
+            rn: vreg(19),
+            rm: vreg(18),
+            size: VectorSize::Size32x4,
+        },
+        "741E324E",
+        "and v20.16b, v19.16b, v18.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Bic,
+            rd: writable_vreg(8),
+            rn: vreg(11),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "681D614E",
+        "bic v8.16b, v11.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Orr,
+            rd: writable_vreg(15),
+            rn: vreg(2),
+            rm: vreg(12),
+            size: VectorSize::Size16x8,
+        },
+        "4F1CAC4E",
+        "orr v15.16b, v2.16b, v12.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Eor,
+            rd: writable_vreg(18),
+            rn: vreg(3),
+            rm: vreg(22),
+            size: VectorSize::Size8x16,
+        },
+        "721C366E",
+        "eor v18.16b, v3.16b, v22.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Bsl,
+            rd: writable_vreg(8),
+            rn: vreg(9),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "281D616E",
+        "bsl v8.16b, v9.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umaxp,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "88A5216E",
+        "umaxp v8.16b, v12.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umaxp,
+            rd: writable_vreg(1),
+            rn: vreg(6),
+            rm: vreg(1),
+            size: VectorSize::Size16x8,
+        },
+        "C1A4616E",
+        "umaxp v1.8h, v6.8h, v1.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umaxp,
+            rd: writable_vreg(1),
+            rn: vreg(20),
+            rm: vreg(16),
+            size: VectorSize::Size32x4,
+        },
+        "81A6B06E",
+        "umaxp v1.4s, v20.4s, v16.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Add,
+            rd: writable_vreg(5),
+            rn: vreg(1),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "2584214E",
+        "add v5.16b, v1.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Add,
+            rd: writable_vreg(7),
+            rn: vreg(13),
+            rm: vreg(2),
+            size: VectorSize::Size16x8,
+        },
+        "A785624E",
+        "add v7.8h, v13.8h, v2.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Add,
+            rd: writable_vreg(18),
+            rn: vreg(9),
+            rm: vreg(6),
+            size: VectorSize::Size32x4,
+        },
+        "3285A64E",
+        "add v18.4s, v9.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Add,
+            rd: writable_vreg(1),
+            rn: vreg(3),
+            rm: vreg(2),
+            size: VectorSize::Size64x2,
+        },
+        "6184E24E",
+        "add v1.2d, v3.2d, v2.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sub,
+            rd: writable_vreg(5),
+            rn: vreg(1),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "2584216E",
+        "sub v5.16b, v1.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sub,
+            rd: writable_vreg(7),
+            rn: vreg(13),
+            rm: vreg(2),
+            size: VectorSize::Size16x8,
+        },
+        "A785626E",
+        "sub v7.8h, v13.8h, v2.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sub,
+            rd: writable_vreg(18),
+            rn: vreg(9),
+            rm: vreg(6),
+            size: VectorSize::Size32x4,
+        },
+        "3285A66E",
+        "sub v18.4s, v9.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sub,
+            rd: writable_vreg(18),
+            rn: vreg(0),
+            rm: vreg(8),
+            size: VectorSize::Size64x2,
+        },
+        "1284E86E",
+        "sub v18.2d, v0.2d, v8.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Mul,
+            rd: writable_vreg(25),
+            rn: vreg(9),
+            rm: vreg(8),
+            size: VectorSize::Size8x16,
+        },
+        "399D284E",
+        "mul v25.16b, v9.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Mul,
+            rd: writable_vreg(30),
+            rn: vreg(30),
+            rm: vreg(12),
+            size: VectorSize::Size16x8,
+        },
+        "DE9F6C4E",
+        "mul v30.8h, v30.8h, v12.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Mul,
+            rd: writable_vreg(18),
+            rn: vreg(18),
+            rm: vreg(18),
+            size: VectorSize::Size32x4,
+        },
+        "529EB24E",
+        "mul v18.4s, v18.4s, v18.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Ushl,
+            rd: writable_vreg(18),
+            rn: vreg(18),
+            rm: vreg(18),
+            size: VectorSize::Size8x16,
+        },
+        "5246326E",
+        "ushl v18.16b, v18.16b, v18.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Ushl,
+            rd: writable_vreg(18),
+            rn: vreg(18),
+            rm: vreg(18),
+            size: VectorSize::Size16x8,
+        },
+        "5246726E",
+        "ushl v18.8h, v18.8h, v18.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Ushl,
+            rd: writable_vreg(18),
+            rn: vreg(1),
+            rm: vreg(21),
+            size: VectorSize::Size32x4,
+        },
+        "3244B56E",
+        "ushl v18.4s, v1.4s, v21.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Ushl,
+            rd: writable_vreg(5),
+            rn: vreg(7),
+            rm: vreg(19),
+            size: VectorSize::Size64x2,
+        },
+        "E544F36E",
+        "ushl v5.2d, v7.2d, v19.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sshl,
+            rd: writable_vreg(18),
+            rn: vreg(18),
+            rm: vreg(18),
+            size: VectorSize::Size8x16,
+        },
+        "5246324E",
+        "sshl v18.16b, v18.16b, v18.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sshl,
+            rd: writable_vreg(30),
+            rn: vreg(1),
+            rm: vreg(29),
+            size: VectorSize::Size16x8,
+        },
+        "3E447D4E",
+        "sshl v30.8h, v1.8h, v29.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sshl,
+            rd: writable_vreg(8),
+            rn: vreg(22),
+            rm: vreg(21),
+            size: VectorSize::Size32x4,
+        },
+        "C846B54E",
+        "sshl v8.4s, v22.4s, v21.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sshl,
+            rd: writable_vreg(8),
+            rn: vreg(22),
+            rm: vreg(2),
+            size: VectorSize::Size64x2,
+        },
+        "C846E24E",
+        "sshl v8.2d, v22.2d, v2.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umin,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(3),
+            size: VectorSize::Size8x16,
+        },
+        "816D236E",
+        "umin v1.16b, v12.16b, v3.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umin,
+            rd: writable_vreg(30),
+            rn: vreg(20),
+            rm: vreg(10),
+            size: VectorSize::Size16x8,
+        },
+        "9E6E6A6E",
+        "umin v30.8h, v20.8h, v10.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umin,
+            rd: writable_vreg(8),
+            rn: vreg(22),
+            rm: vreg(21),
+            size: VectorSize::Size32x4,
+        },
+        "C86EB56E",
+        "umin v8.4s, v22.4s, v21.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smin,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(3),
+            size: VectorSize::Size8x16,
+        },
+        "816D234E",
+        "smin v1.16b, v12.16b, v3.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smin,
+            rd: writable_vreg(30),
+            rn: vreg(20),
+            rm: vreg(10),
+            size: VectorSize::Size16x8,
+        },
+        "9E6E6A4E",
+        "smin v30.8h, v20.8h, v10.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smin,
+            rd: writable_vreg(8),
+            rn: vreg(22),
+            rm: vreg(21),
+            size: VectorSize::Size32x4,
+        },
+        "C86EB54E",
+        "smin v8.4s, v22.4s, v21.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umax,
+            rd: writable_vreg(6),
+            rn: vreg(9),
+            rm: vreg(8),
+            size: VectorSize::Size8x8,
+        },
+        "2665282E",
+        "umax v6.8b, v9.8b, v8.8b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umax,
+            rd: writable_vreg(11),
+            rn: vreg(13),
+            rm: vreg(2),
+            size: VectorSize::Size16x8,
+        },
+        "AB65626E",
+        "umax v11.8h, v13.8h, v2.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umax,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(14),
+            size: VectorSize::Size32x4,
+        },
+        "8865AE6E",
+        "umax v8.4s, v12.4s, v14.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smax,
+            rd: writable_vreg(6),
+            rn: vreg(9),
+            rm: vreg(8),
+            size: VectorSize::Size8x16,
+        },
+        "2665284E",
+        "smax v6.16b, v9.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smax,
+            rd: writable_vreg(11),
+            rn: vreg(13),
+            rm: vreg(2),
+            size: VectorSize::Size16x8,
+        },
+        "AB65624E",
+        "smax v11.8h, v13.8h, v2.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smax,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(14),
+            size: VectorSize::Size32x4,
+        },
+        "8865AE4E",
+        "smax v8.4s, v12.4s, v14.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Urhadd,
+            rd: writable_vreg(8),
+            rn: vreg(1),
+            rm: vreg(3),
+            size: VectorSize::Size8x16,
+        },
+        "2814236E",
+        "urhadd v8.16b, v1.16b, v3.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Urhadd,
+            rd: writable_vreg(2),
+            rn: vreg(13),
+            rm: vreg(6),
+            size: VectorSize::Size16x8,
+        },
+        "A215666E",
+        "urhadd v2.8h, v13.8h, v6.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Urhadd,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(14),
+            size: VectorSize::Size32x4,
+        },
+        "8815AE6E",
+        "urhadd v8.4s, v12.4s, v14.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fadd,
+            rd: writable_vreg(31),
+            rn: vreg(0),
+            rm: vreg(16),
+            size: VectorSize::Size32x4,
+        },
+        "1FD4304E",
+        "fadd v31.4s, v0.4s, v16.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fsub,
+            rd: writable_vreg(8),
+            rn: vreg(7),
+            rm: vreg(15),
+            size: VectorSize::Size64x2,
+        },
+        "E8D4EF4E",
+        "fsub v8.2d, v7.2d, v15.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fdiv,
+            rd: writable_vreg(1),
+            rn: vreg(3),
+            rm: vreg(4),
+            size: VectorSize::Size32x4,
+        },
+        "61FC246E",
+        "fdiv v1.4s, v3.4s, v4.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fmax,
+            rd: writable_vreg(31),
+            rn: vreg(16),
+            rm: vreg(0),
+            size: VectorSize::Size64x2,
+        },
+        "1FF6604E",
+        "fmax v31.2d, v16.2d, v0.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fmin,
+            rd: writable_vreg(5),
+            rn: vreg(19),
+            rm: vreg(26),
+            size: VectorSize::Size32x4,
+        },
+        "65F6BA4E",
+        "fmin v5.4s, v19.4s, v26.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fmul,
+            rd: writable_vreg(2),
+            rn: vreg(0),
+            rm: vreg(5),
+            size: VectorSize::Size64x2,
+        },
+        "02DC656E",
+        "fmul v2.2d, v0.2d, v5.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Addp,
+            rd: writable_vreg(16),
+            rn: vreg(12),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "90BD214E",
+        "addp v16.16b, v12.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Addp,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(14),
+            size: VectorSize::Size32x4,
+        },
+        "88BDAE4E",
+        "addp v8.4s, v12.4s, v14.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umlal,
+            rd: writable_vreg(9),
+            rn: vreg(20),
+            rm: vreg(17),
+            size: VectorSize::Size32x2,
+        },
+        "8982B12E",
+        "umlal v9.2d, v20.2s, v17.2s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Zip1,
+            rd: writable_vreg(16),
+            rn: vreg(12),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "9039014E",
+        "zip1 v16.16b, v12.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Zip1,
+            rd: writable_vreg(2),
+            rn: vreg(13),
+            rm: vreg(6),
+            size: VectorSize::Size16x8,
+        },
+        "A239464E",
+        "zip1 v2.8h, v13.8h, v6.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Zip1,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(14),
+            size: VectorSize::Size32x4,
+        },
+        "88398E4E",
+        "zip1 v8.4s, v12.4s, v14.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Zip1,
+            rd: writable_vreg(9),
+            rn: vreg(20),
+            rm: vreg(17),
+            size: VectorSize::Size64x2,
+        },
+        "893AD14E",
+        "zip1 v9.2d, v20.2d, v17.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smull,
+            rd: writable_vreg(16),
+            rn: vreg(12),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "90C1210E",
+        "smull v16.8h, v12.8b, v1.8b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smull,
+            rd: writable_vreg(2),
+            rn: vreg(13),
+            rm: vreg(6),
+            size: VectorSize::Size16x8,
+        },
+        "A2C1660E",
+        "smull v2.4s, v13.4h, v6.4h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smull,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(14),
+            size: VectorSize::Size32x4,
+        },
+        "88C1AE0E",
+        "smull v8.2d, v12.2s, v14.2s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smull2,
+            rd: writable_vreg(16),
+            rn: vreg(12),
+            rm: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "90C1214E",
+        "smull2 v16.8h, v12.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smull2,
+            rd: writable_vreg(2),
+            rn: vreg(13),
+            rm: vreg(6),
+            size: VectorSize::Size16x8,
+        },
+        "A2C1664E",
+        "smull2 v2.4s, v13.8h, v6.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smull2,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(14),
+            size: VectorSize::Size32x4,
+        },
+        "88C1AE4E",
+        "smull2 v8.2d, v12.4s, v14.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Not,
+            rd: writable_vreg(20),
+            rn: vreg(17),
+            size: VectorSize::Size8x8,
+        },
+        "345A202E",
+        "mvn v20.8b, v17.8b",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Not,
+            rd: writable_vreg(2),
+            rn: vreg(1),
+            size: VectorSize::Size32x4,
+        },
+        "2258206E",
+        "mvn v2.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Neg,
+            rd: writable_vreg(3),
+            rn: vreg(7),
+            size: VectorSize::Size8x8,
+        },
+        "E3B8202E",
+        "neg v3.8b, v7.8b",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Neg,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            size: VectorSize::Size8x16,
+        },
+        "88B9206E",
+        "neg v8.16b, v12.16b",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Neg,
+            rd: writable_vreg(0),
+            rn: vreg(31),
+            size: VectorSize::Size16x8,
+        },
+        "E0BB606E",
+        "neg v0.8h, v31.8h",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Neg,
+            rd: writable_vreg(2),
+            rn: vreg(3),
+            size: VectorSize::Size32x4,
+        },
+        "62B8A06E",
+        "neg v2.4s, v3.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Neg,
+            rd: writable_vreg(10),
+            rn: vreg(8),
+            size: VectorSize::Size64x2,
+        },
+        "0AB9E06E",
+        "neg v10.2d, v8.2d",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Abs,
+            rd: writable_vreg(3),
+            rn: vreg(1),
+            size: VectorSize::Size8x8,
+        },
+        "23B8200E",
+        "abs v3.8b, v1.8b",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Abs,
+            rd: writable_vreg(1),
+            rn: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "21B8204E",
+        "abs v1.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Abs,
+            rd: writable_vreg(29),
+            rn: vreg(28),
+            size: VectorSize::Size16x8,
+        },
+        "9DBB604E",
+        "abs v29.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Abs,
+            rd: writable_vreg(7),
+            rn: vreg(8),
+            size: VectorSize::Size32x4,
+        },
+        "07B9A04E",
+        "abs v7.4s, v8.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Abs,
+            rd: writable_vreg(1),
+            rn: vreg(10),
+            size: VectorSize::Size64x2,
+        },
+        "41B9E04E",
+        "abs v1.2d, v10.2d",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Fabs,
+            rd: writable_vreg(15),
+            rn: vreg(16),
+            size: VectorSize::Size32x4,
+        },
+        "0FFAA04E",
+        "fabs v15.4s, v16.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Fneg,
+            rd: writable_vreg(31),
+            rn: vreg(0),
+            size: VectorSize::Size32x4,
+        },
+        "1FF8A06E",
+        "fneg v31.4s, v0.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Fsqrt,
+            rd: writable_vreg(7),
+            rn: vreg(18),
+            size: VectorSize::Size64x2,
+        },
+        "47FAE16E",
+        "fsqrt v7.2d, v18.2d",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Rev64,
+            rd: writable_vreg(1),
+            rn: vreg(10),
+            size: VectorSize::Size32x4,
+        },
+        "4109A04E",
+        "rev64 v1.4s, v10.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Shll,
+            rd: writable_vreg(12),
+            rn: vreg(5),
+            size: VectorSize::Size8x8,
+        },
+        "AC38212E",
+        "shll v12.8h, v5.8b, #8",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Shll,
+            rd: writable_vreg(9),
+            rn: vreg(1),
+            size: VectorSize::Size16x4,
+        },
+        "2938612E",
+        "shll v9.4s, v1.4h, #16",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Shll,
+            rd: writable_vreg(1),
+            rn: vreg(10),
+            size: VectorSize::Size32x2,
+        },
+        "4139A12E",
+        "shll v1.2d, v10.2s, #32",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Fcvtzs,
+            rd: writable_vreg(4),
+            rn: vreg(22),
+            size: VectorSize::Size32x4,
+        },
+        "C4BAA14E",
+        "fcvtzs v4.4s, v22.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Fcvtzu,
+            rd: writable_vreg(29),
+            rn: vreg(15),
+            size: VectorSize::Size64x2,
+        },
+        "FDB9E16E",
+        "fcvtzu v29.2d, v15.2d",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Scvtf,
+            rd: writable_vreg(20),
+            rn: vreg(8),
+            size: VectorSize::Size32x4,
+        },
+        "14D9214E",
+        "scvtf v20.4s, v8.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Ucvtf,
+            rd: writable_vreg(10),
+            rn: vreg(19),
+            size: VectorSize::Size64x2,
+        },
+        "6ADA616E",
+        "ucvtf v10.2d, v19.2d",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Frintn,
+            rd: writable_vreg(11),
+            rn: vreg(18),
+            size: VectorSize::Size32x4,
+        },
+        "4B8A214E",
+        "frintn v11.4s, v18.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Frintn,
+            rd: writable_vreg(12),
+            rn: vreg(17),
+            size: VectorSize::Size64x2,
+        },
+        "2C8A614E",
+        "frintn v12.2d, v17.2d",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Frintz,
+            rd: writable_vreg(11),
+            rn: vreg(18),
+            size: VectorSize::Size32x4,
+        },
+        "4B9AA14E",
+        "frintz v11.4s, v18.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Frintz,
+            rd: writable_vreg(12),
+            rn: vreg(17),
+            size: VectorSize::Size64x2,
+        },
+        "2C9AE14E",
+        "frintz v12.2d, v17.2d",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Frintm,
+            rd: writable_vreg(11),
+            rn: vreg(18),
+            size: VectorSize::Size32x4,
+        },
+        "4B9A214E",
+        "frintm v11.4s, v18.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Frintm,
+            rd: writable_vreg(12),
+            rn: vreg(17),
+            size: VectorSize::Size64x2,
+        },
+        "2C9A614E",
+        "frintm v12.2d, v17.2d",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Frintp,
+            rd: writable_vreg(11),
+            rn: vreg(18),
+            size: VectorSize::Size32x4,
+        },
+        "4B8AA14E",
+        "frintp v11.4s, v18.4s",
+    ));
+
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Frintp,
+            rd: writable_vreg(12),
+            rn: vreg(17),
+            size: VectorSize::Size64x2,
+        },
+        "2C8AE14E",
+        "frintp v12.2d, v17.2d",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Uminv,
+            rd: writable_vreg(2),
+            rn: vreg(1),
+            size: VectorSize::Size8x16,
+        },
+        "22A8316E",
+        "uminv b2, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Uminv,
+            rd: writable_vreg(3),
+            rn: vreg(11),
+            size: VectorSize::Size16x8,
+        },
+        "63A9716E",
+        "uminv h3, v11.8h",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Uminv,
+            rd: writable_vreg(18),
+            rn: vreg(4),
+            size: VectorSize::Size32x4,
+        },
+        "92A8B16E",
+        "uminv s18, v4.4s",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Addv,
+            rd: writable_vreg(2),
+            rn: vreg(29),
+            size: VectorSize::Size8x16,
+        },
+        "A2BB314E",
+        "addv b2, v29.16b",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Addv,
+            rd: writable_vreg(3),
+            rn: vreg(21),
+            size: VectorSize::Size16x8,
+        },
+        "A3BA714E",
+        "addv h3, v21.8h",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Addv,
+            rd: writable_vreg(18),
+            rn: vreg(5),
+            size: VectorSize::Size32x4,
+        },
+        "B2B8B14E",
+        "addv s18, v5.4s",
+    ));
+
+    insns.push((
+        Inst::VecShiftImm {
+            op: VecShiftImmOp::Shl,
+            rd: writable_vreg(27),
+            rn: vreg(5),
+            imm: 7,
+            size: VectorSize::Size8x16,
+        },
+        "BB540F4F",
+        "shl v27.16b, v5.16b, #7",
+    ));
+
+    insns.push((
+        Inst::VecShiftImm {
+            op: VecShiftImmOp::Shl,
+            rd: writable_vreg(1),
+            rn: vreg(30),
+            imm: 0,
+            size: VectorSize::Size8x16,
+        },
+        "C157084F",
+        "shl v1.16b, v30.16b, #0",
+    ));
+
+    insns.push((
+        Inst::VecShiftImm {
+            op: VecShiftImmOp::Sshr,
+            rd: writable_vreg(26),
+            rn: vreg(6),
+            imm: 16,
+            size: VectorSize::Size16x8,
+        },
+        "DA04104F",
+        "sshr v26.8h, v6.8h, #16",
+    ));
+
+    insns.push((
+        Inst::VecShiftImm {
+            op: VecShiftImmOp::Sshr,
+            rd: writable_vreg(3),
+            rn: vreg(19),
+            imm: 1,
+            size: VectorSize::Size16x8,
+        },
+        "63061F4F",
+        "sshr v3.8h, v19.8h, #1",
+    ));
+
+    insns.push((
+        Inst::VecShiftImm {
+            op: VecShiftImmOp::Ushr,
+            rd: writable_vreg(25),
+            rn: vreg(6),
+            imm: 32,
+            size: VectorSize::Size32x4,
+        },
+        "D904206F",
+        "ushr v25.4s, v6.4s, #32",
+    ));
+
+    insns.push((
+        Inst::VecShiftImm {
+            op: VecShiftImmOp::Ushr,
+            rd: writable_vreg(5),
+            rn: vreg(21),
+            imm: 1,
+            size: VectorSize::Size32x4,
+        },
+        "A5063F6F",
+        "ushr v5.4s, v21.4s, #1",
+    ));
+
+    insns.push((
+        Inst::VecShiftImm {
+            op: VecShiftImmOp::Shl,
+            rd: writable_vreg(22),
+            rn: vreg(13),
+            imm: 63,
+            size: VectorSize::Size64x2,
+        },
+        "B6557F4F",
+        "shl v22.2d, v13.2d, #63",
+    ));
+
+    insns.push((
+        Inst::VecShiftImm {
+            op: VecShiftImmOp::Shl,
+            rd: writable_vreg(23),
+            rn: vreg(9),
+            imm: 0,
+            size: VectorSize::Size64x2,
+        },
+        "3755404F",
+        "shl v23.2d, v9.2d, #0",
+    ));
+
+    insns.push((
+        Inst::VecExtract {
+            rd: writable_vreg(1),
+            rn: vreg(30),
+            rm: vreg(17),
+            imm4: 0,
+        },
+        "C103116E",
+        "ext v1.16b, v30.16b, v17.16b, #0",
+    ));
+
+    insns.push((
+        Inst::VecExtract {
+            rd: writable_vreg(1),
+            rn: vreg(30),
+            rm: vreg(17),
+            imm4: 8,
+        },
+        "C143116E",
+        "ext v1.16b, v30.16b, v17.16b, #8",
+    ));
+
+    insns.push((
+        Inst::VecExtract {
+            rd: writable_vreg(1),
+            rn: vreg(30),
+            rm: vreg(17),
+            imm4: 15,
+        },
+        "C17B116E",
+        "ext v1.16b, v30.16b, v17.16b, #15",
+    ));
+
+    insns.push((
+        Inst::VecTbl {
+            rd: writable_vreg(0),
+            rn: vreg(31),
+            rm: vreg(16),
+            is_extension: false,
+        },
+        "E003104E",
+        "tbl v0.16b, { v31.16b }, v16.16b",
+    ));
+
+    insns.push((
+        Inst::VecTbl {
+            rd: writable_vreg(4),
+            rn: vreg(12),
+            rm: vreg(23),
+            is_extension: true,
+        },
+        "8411174E",
+        "tbx v4.16b, { v12.16b }, v23.16b",
+    ));
+
+    insns.push((
+        Inst::VecTbl2 {
+            rd: writable_vreg(16),
+            rn: vreg(31),
+            rn2: vreg(0),
+            rm: vreg(26),
+            is_extension: false,
+        },
+        "F0231A4E",
+        "tbl v16.16b, { v31.16b, v0.16b }, v26.16b",
+    ));
+
+    insns.push((
+        Inst::VecTbl2 {
+            rd: writable_vreg(3),
+            rn: vreg(11),
+            rn2: vreg(12),
+            rm: vreg(19),
+            is_extension: true,
+        },
+        "6331134E",
+        "tbx v3.16b, { v11.16b, v12.16b }, v19.16b",
+    ));
+
+    insns.push((
+        Inst::VecLoadReplicate {
+            rd: writable_vreg(31),
+            rn: xreg(0),
+
+            size: VectorSize::Size64x2,
+        },
+        "1FCC404D",
+        "ld1r { v31.2d }, [x0]",
+    ));
+
+    insns.push((
+        Inst::VecLoadReplicate {
+            rd: writable_vreg(0),
+            rn: xreg(25),
+
+            size: VectorSize::Size8x8,
+        },
+        "20C3400D",
+        "ld1r { v0.8b }, [x25]",
+    ));
+
+    insns.push((
+        Inst::VecCSel {
+            rd: writable_vreg(5),
+            rn: vreg(10),
+            rm: vreg(19),
+            cond: Cond::Gt,
+        },
+        "6C000054651EB34E02000014451DAA4E",
+        "vcsel v5.16b, v10.16b, v19.16b, gt (if-then-else diamond)",
+    ));
+
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: false,
+            from_bits: 8,
+            to_bits: 32,
+        },
+        "411C0053",
+        "uxtb w1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: true,
+            from_bits: 8,
+            to_bits: 32,
+        },
+        "411C0013",
+        "sxtb w1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: false,
+            from_bits: 16,
+            to_bits: 32,
+        },
+        "413C0053",
+        "uxth w1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: true,
+            from_bits: 16,
+            to_bits: 32,
+        },
+        "413C0013",
+        "sxth w1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: false,
+            from_bits: 8,
+            to_bits: 64,
+        },
+        "411C0053",
+        "uxtb x1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: true,
+            from_bits: 8,
+            to_bits: 64,
+        },
+        "411C4093",
+        "sxtb x1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: false,
+            from_bits: 16,
+            to_bits: 64,
+        },
+        "413C0053",
+        "uxth x1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: true,
+            from_bits: 16,
+            to_bits: 64,
+        },
+        "413C4093",
+        "sxth x1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: false,
+            from_bits: 32,
+            to_bits: 64,
+        },
+        "E103022A",
+        "mov w1, w2",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: true,
+            from_bits: 32,
+            to_bits: 64,
+        },
+        "417C4093",
+        "sxtw x1, w2",
+    ));
+
+    insns.push((
+        Inst::Jump {
+            dest: BranchTarget::ResolvedOffset(64),
+        },
+        "10000014",
+        "b 64",
+    ));
+
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::NotZero(xreg(8)),
+        },
+        "480000B40000A0D4",
+        "cbz x8, 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Zero(xreg(8)),
+        },
+        "480000B50000A0D4",
+        "cbnz x8, 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Ne),
+        },
+        "400000540000A0D4",
+        "b.eq 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Eq),
+        },
+        "410000540000A0D4",
+        "b.ne 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Lo),
+        },
+        "420000540000A0D4",
+        "b.hs 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Hs),
+        },
+        "430000540000A0D4",
+        "b.lo 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Pl),
+        },
+        "440000540000A0D4",
+        "b.mi 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Mi),
+        },
+        "450000540000A0D4",
+        "b.pl 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Vc),
+        },
+        "460000540000A0D4",
+        "b.vs 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Vs),
+        },
+        "470000540000A0D4",
+        "b.vc 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Ls),
+        },
+        "480000540000A0D4",
+        "b.hi 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Hi),
+        },
+        "490000540000A0D4",
+        "b.ls 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Lt),
+        },
+        "4A0000540000A0D4",
+        "b.ge 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Ge),
+        },
+        "4B0000540000A0D4",
+        "b.lt 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Le),
+        },
+        "4C0000540000A0D4",
+        "b.gt 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Gt),
+        },
+        "4D0000540000A0D4",
+        "b.le 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Nv),
+        },
+        "4E0000540000A0D4",
+        "b.al 8 ; udf",
+    ));
+    insns.push((
+        Inst::TrapIf {
+            trap_code: TrapCode::Interrupt,
+            kind: CondBrKind::Cond(Cond::Al),
+        },
+        "4F0000540000A0D4",
+        "b.nv 8 ; udf",
+    ));
+
+    insns.push((
+        Inst::CondBr {
+            taken: BranchTarget::ResolvedOffset(64),
+            not_taken: BranchTarget::ResolvedOffset(128),
+            kind: CondBrKind::Cond(Cond::Le),
+        },
+        "0D02005420000014",
+        "b.le 64 ; b 128",
+    ));
+
+    insns.push((
+        Inst::Call {
+            info: Box::new(CallInfo {
+                dest: ExternalName::testcase("test0"),
+                uses: Vec::new(),
+                defs: Vec::new(),
+                opcode: Opcode::Call,
+                caller_callconv: CallConv::SystemV,
+                callee_callconv: CallConv::SystemV,
+            }),
+        },
+        "00000094",
+        "bl 0",
+    ));
+
+    insns.push((
+        Inst::CallInd {
+            info: Box::new(CallIndInfo {
+                rn: xreg(10),
+                uses: Vec::new(),
+                defs: Vec::new(),
+                opcode: Opcode::CallIndirect,
+                caller_callconv: CallConv::SystemV,
+                callee_callconv: CallConv::SystemV,
+            }),
+        },
+        "40013FD6",
+        "blr x10",
+    ));
+
+    insns.push((
+        Inst::IndirectBr {
+            rn: xreg(3),
+            targets: vec![],
+        },
+        "60001FD6",
+        "br x3",
+    ));
+
+    insns.push((Inst::Brk, "000020D4", "brk #0"));
+
+    insns.push((
+        Inst::Adr {
+            rd: writable_xreg(15),
+            off: (1 << 20) - 4,
+        },
+        "EFFF7F10",
+        "adr x15, pc+1048572",
+    ));
+
+    insns.push((
+        Inst::FpuMove64 {
+            rd: writable_vreg(8),
+            rn: vreg(4),
+        },
+        "881CA40E",
+        "mov v8.8b, v4.8b",
+    ));
+
+    insns.push((
+        Inst::FpuMove128 {
+            rd: writable_vreg(17),
+            rn: vreg(26),
+        },
+        "511FBA4E",
+        "mov v17.16b, v26.16b",
+    ));
+
+    insns.push((
+        Inst::FpuMoveFromVec {
+            rd: writable_vreg(1),
+            rn: vreg(30),
+            idx: 2,
+            size: VectorSize::Size32x4,
+        },
+        "C107145E",
+        "mov s1, v30.s[2]",
+    ));
+
+    insns.push((
+        Inst::FpuMoveFromVec {
+            rd: writable_vreg(23),
+            rn: vreg(11),
+            idx: 0,
+            size: VectorSize::Size64x2,
+        },
+        "7705085E",
+        "mov d23, v11.d[0]",
+    ));
+
+    insns.push((
+        Inst::FpuRR {
+            fpu_op: FPUOp1::Abs32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+        },
+        "CFC3201E",
+        "fabs s15, s30",
+    ));
+
+    insns.push((
+        Inst::FpuRR {
+            fpu_op: FPUOp1::Abs64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+        },
+        "CFC3601E",
+        "fabs d15, d30",
+    ));
+
+    insns.push((
+        Inst::FpuRR {
+            fpu_op: FPUOp1::Neg32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+        },
+        "CF43211E",
+        "fneg s15, s30",
+    ));
+
+    insns.push((
+        Inst::FpuRR {
+            fpu_op: FPUOp1::Neg64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+        },
+        "CF43611E",
+        "fneg d15, d30",
+    ));
+
+    insns.push((
+        Inst::FpuRR {
+            fpu_op: FPUOp1::Sqrt32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+        },
+        "CFC3211E",
+        "fsqrt s15, s30",
+    ));
+
+    insns.push((
+        Inst::FpuRR {
+            fpu_op: FPUOp1::Sqrt64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+        },
+        "CFC3611E",
+        "fsqrt d15, d30",
+    ));
+
+    insns.push((
+        Inst::FpuRR {
+            fpu_op: FPUOp1::Cvt32To64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+        },
+        "CFC3221E",
+        "fcvt d15, s30",
+    ));
+
+    insns.push((
+        Inst::FpuRR {
+            fpu_op: FPUOp1::Cvt64To32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+        },
+        "CF43621E",
+        "fcvt s15, d30",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Add32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF2B3F1E",
+        "fadd s15, s30, s31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Add64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF2B7F1E",
+        "fadd d15, d30, d31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Sub32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF3B3F1E",
+        "fsub s15, s30, s31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Sub64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF3B7F1E",
+        "fsub d15, d30, d31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Mul32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF0B3F1E",
+        "fmul s15, s30, s31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Mul64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF0B7F1E",
+        "fmul d15, d30, d31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Div32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF1B3F1E",
+        "fdiv s15, s30, s31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Div64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF1B7F1E",
+        "fdiv d15, d30, d31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Max32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF4B3F1E",
+        "fmax s15, s30, s31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Max64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF4B7F1E",
+        "fmax d15, d30, d31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Min32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF5B3F1E",
+        "fmin s15, s30, s31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Min64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+        },
+        "CF5B7F1E",
+        "fmin d15, d30, d31",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Uqadd64,
+            rd: writable_vreg(21),
+            rn: vreg(22),
+            rm: vreg(23),
+        },
+        "D50EF77E",
+        "uqadd d21, d22, d23",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Sqadd64,
+            rd: writable_vreg(21),
+            rn: vreg(22),
+            rm: vreg(23),
+        },
+        "D50EF75E",
+        "sqadd d21, d22, d23",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Uqsub64,
+            rd: writable_vreg(21),
+            rn: vreg(22),
+            rm: vreg(23),
+        },
+        "D52EF77E",
+        "uqsub d21, d22, d23",
+    ));
+
+    insns.push((
+        Inst::FpuRRR {
+            fpu_op: FPUOp2::Sqsub64,
+            rd: writable_vreg(21),
+            rn: vreg(22),
+            rm: vreg(23),
+        },
+        "D52EF75E",
+        "sqsub d21, d22, d23",
+    ));
+
+    insns.push((
+        Inst::FpuRRRR {
+            fpu_op: FPUOp3::MAdd32,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+            ra: vreg(1),
+        },
+        "CF071F1F",
+        "fmadd s15, s30, s31, s1",
+    ));
+
+    insns.push((
+        Inst::FpuRRRR {
+            fpu_op: FPUOp3::MAdd64,
+            rd: writable_vreg(15),
+            rn: vreg(30),
+            rm: vreg(31),
+            ra: vreg(1),
+        },
+        "CF075F1F",
+        "fmadd d15, d30, d31, d1",
+    ));
+
+    insns.push((
+        Inst::FpuRRI {
+            fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()),
+            rd: writable_vreg(2),
+            rn: vreg(5),
+        },
+        "A204202F",
+        "ushr v2.2s, v5.2s, #32",
+    ));
+
+    insns.push((
+        Inst::FpuRRI {
+            fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()),
+            rd: writable_vreg(2),
+            rn: vreg(5),
+        },
+        "A204417F",
+        "ushr d2, d5, #63",
+    ));
+
+    insns.push((
+        Inst::FpuRRI {
+            fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()),
+            rd: writable_vreg(4),
+            rn: vreg(10),
+        },
+        "44553F2F",
+        "sli v4.2s, v10.2s, #31",
+    ));
+
+    insns.push((
+        Inst::FpuRRI {
+            fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()),
+            rd: writable_vreg(4),
+            rn: vreg(10),
+        },
+        "44557F7F",
+        "sli d4, d10, #63",
+    ));
+
+    insns.push((
+        Inst::FpuToInt {
+            op: FpuToIntOp::F32ToU32,
+            rd: writable_xreg(1),
+            rn: vreg(4),
+        },
+        "8100391E",
+        "fcvtzu w1, s4",
+    ));
+
+    insns.push((
+        Inst::FpuToInt {
+            op: FpuToIntOp::F32ToU64,
+            rd: writable_xreg(1),
+            rn: vreg(4),
+        },
+        "8100399E",
+        "fcvtzu x1, s4",
+    ));
+
+    insns.push((
+        Inst::FpuToInt {
+            op: FpuToIntOp::F32ToI32,
+            rd: writable_xreg(1),
+            rn: vreg(4),
+        },
+        "8100381E",
+        "fcvtzs w1, s4",
+    ));
+
+    insns.push((
+        Inst::FpuToInt {
+            op: FpuToIntOp::F32ToI64,
+            rd: writable_xreg(1),
+            rn: vreg(4),
+        },
+        "8100389E",
+        "fcvtzs x1, s4",
+    ));
+
+    insns.push((
+        Inst::FpuToInt {
+            op: FpuToIntOp::F64ToU32,
+            rd: writable_xreg(1),
+            rn: vreg(4),
+        },
+        "8100791E",
+        "fcvtzu w1, d4",
+    ));
+
+    insns.push((
+        Inst::FpuToInt {
+            op: FpuToIntOp::F64ToU64,
+            rd: writable_xreg(1),
+            rn: vreg(4),
+        },
+        "8100799E",
+        "fcvtzu x1, d4",
+    ));
+
+    insns.push((
+        Inst::FpuToInt {
+            op: FpuToIntOp::F64ToI32,
+            rd: writable_xreg(1),
+            rn: vreg(4),
+        },
+        "8100781E",
+        "fcvtzs w1, d4",
+    ));
+
+    insns.push((
+        Inst::FpuToInt {
+            op: FpuToIntOp::F64ToI64,
+            rd: writable_xreg(1),
+            rn: vreg(4),
+        },
+        "8100789E",
+        "fcvtzs x1, d4",
+    ));
+
+    insns.push((
+        Inst::IntToFpu {
+            op: IntToFpuOp::U32ToF32,
+            rd: writable_vreg(1),
+            rn: xreg(4),
+        },
+        "8100231E",
+        "ucvtf s1, w4",
+    ));
+
+    insns.push((
+        Inst::IntToFpu {
+            op: IntToFpuOp::I32ToF32,
+            rd: writable_vreg(1),
+            rn: xreg(4),
+        },
+        "8100221E",
+        "scvtf s1, w4",
+    ));
+
+    insns.push((
+        Inst::IntToFpu {
+            op: IntToFpuOp::U32ToF64,
+            rd: writable_vreg(1),
+            rn: xreg(4),
+        },
+        "8100631E",
+        "ucvtf d1, w4",
+    ));
+
+    insns.push((
+        Inst::IntToFpu {
+            op: IntToFpuOp::I32ToF64,
+            rd: writable_vreg(1),
+            rn: xreg(4),
+        },
+        "8100621E",
+        "scvtf d1, w4",
+    ));
+
+    insns.push((
+        Inst::IntToFpu {
+            op: IntToFpuOp::U64ToF32,
+            rd: writable_vreg(1),
+            rn: xreg(4),
+        },
+        "8100239E",
+        "ucvtf s1, x4",
+    ));
+
+    insns.push((
+        Inst::IntToFpu {
+            op: IntToFpuOp::I64ToF32,
+            rd: writable_vreg(1),
+            rn: xreg(4),
+        },
+        "8100229E",
+        "scvtf s1, x4",
+    ));
+
+    insns.push((
+        Inst::IntToFpu {
+            op: IntToFpuOp::U64ToF64,
+            rd: writable_vreg(1),
+            rn: xreg(4),
+        },
+        "8100639E",
+        "ucvtf d1, x4",
+    ));
+
+    insns.push((
+        Inst::IntToFpu {
+            op: IntToFpuOp::I64ToF64,
+            rd: writable_vreg(1),
+            rn: xreg(4),
+        },
+        "8100629E",
+        "scvtf d1, x4",
+    ));
+
+    insns.push((
+        Inst::FpuCmp32 {
+            rn: vreg(23),
+            rm: vreg(24),
+        },
+        "E022381E",
+        "fcmp s23, s24",
+    ));
+
+    insns.push((
+        Inst::FpuCmp64 {
+            rn: vreg(23),
+            rm: vreg(24),
+        },
+        "E022781E",
+        "fcmp d23, d24",
+    ));
+
+    insns.push((
+        Inst::FpuLoad32 {
+            rd: writable_vreg(16),
+            mem: AMode::RegScaled(xreg(8), xreg(9), F32),
+            flags: MemFlags::trusted(),
+        },
+        "107969BC",
+        "ldr s16, [x8, x9, LSL #2]",
+    ));
+
+    insns.push((
+        Inst::FpuLoad64 {
+            rd: writable_vreg(16),
+            mem: AMode::RegScaled(xreg(8), xreg(9), F64),
+            flags: MemFlags::trusted(),
+        },
+        "107969FC",
+        "ldr d16, [x8, x9, LSL #3]",
+    ));
+
+    insns.push((
+        Inst::FpuLoad128 {
+            rd: writable_vreg(16),
+            mem: AMode::RegScaled(xreg(8), xreg(9), I128),
+            flags: MemFlags::trusted(),
+        },
+        "1079E93C",
+        "ldr q16, [x8, x9, LSL #4]",
+    ));
+
+    insns.push((
+        Inst::FpuLoad32 {
+            rd: writable_vreg(16),
+            mem: AMode::Label(MemLabel::PCRel(8)),
+            flags: MemFlags::trusted(),
+        },
+        "5000001C",
+        "ldr s16, pc+8",
+    ));
+
+    insns.push((
+        Inst::FpuLoad64 {
+            rd: writable_vreg(16),
+            mem: AMode::Label(MemLabel::PCRel(8)),
+            flags: MemFlags::trusted(),
+        },
+        "5000005C",
+        "ldr d16, pc+8",
+    ));
+
+    insns.push((
+        Inst::FpuLoad128 {
+            rd: writable_vreg(16),
+            mem: AMode::Label(MemLabel::PCRel(8)),
+            flags: MemFlags::trusted(),
+        },
+        "5000009C",
+        "ldr q16, pc+8",
+    ));
+
+    insns.push((
+        Inst::FpuStore32 {
+            rd: vreg(16),
+            mem: AMode::RegScaled(xreg(8), xreg(9), F32),
+            flags: MemFlags::trusted(),
+        },
+        "107929BC",
+        "str s16, [x8, x9, LSL #2]",
+    ));
+
+    insns.push((
+        Inst::FpuStore64 {
+            rd: vreg(16),
+            mem: AMode::RegScaled(xreg(8), xreg(9), F64),
+            flags: MemFlags::trusted(),
+        },
+        "107929FC",
+        "str d16, [x8, x9, LSL #3]",
+    ));
+
+    insns.push((
+        Inst::FpuStore128 {
+            rd: vreg(16),
+            mem: AMode::RegScaled(xreg(8), xreg(9), I128),
+            flags: MemFlags::trusted(),
+        },
+        "1079A93C",
+        "str q16, [x8, x9, LSL #4]",
+    ));
+
+    insns.push((
+        Inst::LoadFpuConst64 {
+            rd: writable_vreg(16),
+            const_data: 1.0_f64.to_bits(),
+        },
+        "5000005C03000014000000000000F03F",
+        "ldr d16, pc+8 ; b 12 ; data.f64 1",
+    ));
+
+    insns.push((
+        Inst::LoadFpuConst128 {
+            rd: writable_vreg(5),
+            const_data: 0x0f0e0d0c0b0a09080706050403020100,
+        },
+        "4500009C05000014000102030405060708090A0B0C0D0E0F",
+        "ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
+    ));
+
+    insns.push((
+        Inst::FpuCSel32 {
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(3),
+            cond: Cond::Hi,
+        },
+        "418C231E",
+        "fcsel s1, s2, s3, hi",
+    ));
+
+    insns.push((
+        Inst::FpuCSel64 {
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(3),
+            cond: Cond::Eq,
+        },
+        "410C631E",
+        "fcsel d1, d2, d3, eq",
+    ));
+
+    insns.push((
+        Inst::FpuRound {
+            rd: writable_vreg(23),
+            rn: vreg(24),
+            op: FpuRoundMode::Minus32,
+        },
+        "1743251E",
+        "frintm s23, s24",
+    ));
+    insns.push((
+        Inst::FpuRound {
+            rd: writable_vreg(23),
+            rn: vreg(24),
+            op: FpuRoundMode::Minus64,
+        },
+        "1743651E",
+        "frintm d23, d24",
+    ));
+    insns.push((
+        Inst::FpuRound {
+            rd: writable_vreg(23),
+            rn: vreg(24),
+            op: FpuRoundMode::Plus32,
+        },
+        "17C3241E",
+        "frintp s23, s24",
+    ));
+    insns.push((
+        Inst::FpuRound {
+            rd: writable_vreg(23),
+            rn: vreg(24),
+            op: FpuRoundMode::Plus64,
+        },
+        "17C3641E",
+        "frintp d23, d24",
+    ));
+    insns.push((
+        Inst::FpuRound {
+            rd: writable_vreg(23),
+            rn: vreg(24),
+            op: FpuRoundMode::Zero32,
+        },
+        "17C3251E",
+        "frintz s23, s24",
+    ));
+    insns.push((
+        Inst::FpuRound {
+            rd: writable_vreg(23),
+            rn: vreg(24),
+            op: FpuRoundMode::Zero64,
+        },
+        "17C3651E",
+        "frintz d23, d24",
+    ));
+    insns.push((
+        Inst::FpuRound {
+            rd: writable_vreg(23),
+            rn: vreg(24),
+            op: FpuRoundMode::Nearest32,
+        },
+        "1743241E",
+        "frintn s23, s24",
+    ));
+    insns.push((
+        Inst::FpuRound {
+            rd: writable_vreg(23),
+            rn: vreg(24),
+            op: FpuRoundMode::Nearest64,
+        },
+        "1743641E",
+        "frintn d23, d24",
+    ));
+
+    insns.push((
+        Inst::AtomicRMW {
+            ty: I16,
+            op: inst_common::AtomicRmwOp::Xor,
+        },
+        "BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5",
+        "atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
+    ));
+
+    insns.push((
+        Inst::AtomicRMW {
+            ty: I32,
+            op: inst_common::AtomicRmwOp::Xchg,
+        },
+        "BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
+        "atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
+    ));
+
+    insns.push((
+        Inst::AtomicCAS {
+            ty: I8,
+        },
+        "BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
+        "atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
+    ));
+
+    insns.push((
+        Inst::AtomicCAS {
+            ty: I64,
+        },
+        "BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",
+        "atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
+    ));
+
+    insns.push((
+        Inst::AtomicLoad {
+            ty: I8,
+            r_data: writable_xreg(7),
+            r_addr: xreg(28),
+        },
+        "BF3B03D587034039",
+        "atomically { x7 = zero_extend_8_bits_at[x28] }",
+    ));
+
+    insns.push((
+        Inst::AtomicLoad {
+            ty: I64,
+            r_data: writable_xreg(28),
+            r_addr: xreg(7),
+        },
+        "BF3B03D5FC0040F9",
+        "atomically { x28 = zero_extend_64_bits_at[x7] }",
+    ));
+
+    insns.push((
+        Inst::AtomicStore {
+            ty: I16,
+            r_data: xreg(17),
+            r_addr: xreg(8),
+        },
+        "11010079BF3B03D5",
+        "atomically { 16_bits_at[x8] = x17 }",
+    ));
+
+    insns.push((
+        Inst::AtomicStore {
+            ty: I32,
+            r_data: xreg(18),
+            r_addr: xreg(7),
+        },
+        "F20000B9BF3B03D5",
+        "atomically { 32_bits_at[x7] = x18 }",
+    ));
+
+    insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish"));
+
+    let flags = settings::Flags::new(settings::builder());
+    let rru = create_reg_universe(&flags);
+    let emit_info = EmitInfo::new(flags);
+    for (insn, expected_encoding, expected_printing) in insns {
+        println!(
+            "AArch64: {:?}, {}, {}",
+            insn, expected_encoding, expected_printing
+        );
+
+        // Check the printed text is as expected.
+        let actual_printing = insn.show_rru(Some(&rru));
+        assert_eq!(expected_printing, actual_printing);
+
+        let mut sink = test_utils::TestCodeSink::new();
+        let mut buffer = MachBuffer::new();
+        insn.emit(&mut buffer, &emit_info, &mut Default::default());
+        let buffer = buffer.finish();
+        buffer.emit(&mut sink);
+        let actual_encoding = &sink.stringify();
+        assert_eq!(expected_encoding, actual_encoding);
+    }
+}
+
+#[test]
+fn test_cond_invert() {
+    for cond in vec![
+        Cond::Eq,
+        Cond::Ne,
+        Cond::Hs,
+        Cond::Lo,
+        Cond::Mi,
+        Cond::Pl,
+        Cond::Vs,
+        Cond::Vc,
+        Cond::Hi,
+        Cond::Ls,
+        Cond::Ge,
+        Cond::Lt,
+        Cond::Gt,
+        Cond::Le,
+        Cond::Al,
+        Cond::Nv,
+    ]
+    .into_iter()
+    {
+        assert_eq!(cond.invert().invert(), cond);
+    }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
new file mode 100644
index 0000000000..b6da0402bc
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
@@ -0,0 +1,1025 @@
+//! AArch64 ISA definitions: immediate constants.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#[allow(dead_code)]
+use crate::ir::types::*;
+use crate::ir::Type;
+use crate::isa::aarch64::inst::{OperandSize, ScalarSize};
+
+use regalloc::{PrettyPrint, RealRegUniverse};
+
+use core::convert::TryFrom;
+use std::string::String;
+
+/// An immediate that represents the NZCV flags.
+#[derive(Clone, Copy, Debug)]
+pub struct NZCV {
+    /// The negative condition flag.
+    n: bool,
+    /// The zero condition flag.
+    z: bool,
+    /// The carry condition flag.
+    c: bool,
+    /// The overflow condition flag.
+    v: bool,
+}
+
+impl NZCV {
+    pub fn new(n: bool, z: bool, c: bool, v: bool) -> NZCV {
+        NZCV { n, z, c, v }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        (u32::from(self.n) << 3)
+            | (u32::from(self.z) << 2)
+            | (u32::from(self.c) << 1)
+            | u32::from(self.v)
+    }
+}
+
+/// An unsigned 5-bit immediate.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm5 {
+    /// The value.
+    value: u8,
+}
+
+impl UImm5 {
+    pub fn maybe_from_u8(value: u8) -> Option<UImm5> {
+        if value < 32 {
+            Some(UImm5 { value })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        u32::from(self.value)
+    }
+}
+
+/// A signed, scaled 7-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm7Scaled {
+    /// The value.
+    pub value: i16,
+    /// multiplied by the size of this type
+    pub scale_ty: Type,
+}
+
+impl SImm7Scaled {
+    /// Create a SImm7Scaled from a raw offset and the known scale type, if
+    /// possible.
+    pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
+        assert!(scale_ty == I64 || scale_ty == I32);
+        let scale = scale_ty.bytes();
+        assert!(scale.is_power_of_two());
+        let scale = i64::from(scale);
+        let upper_limit = 63 * scale;
+        let lower_limit = -(64 * scale);
+        if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
+            Some(SImm7Scaled {
+                value: i16::try_from(value).unwrap(),
+                scale_ty,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero(scale_ty: Type) -> SImm7Scaled {
+        SImm7Scaled { value: 0, scale_ty }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        let ty_bytes: i16 = self.scale_ty.bytes() as i16;
+        let scaled: i16 = self.value / ty_bytes;
+        assert!(scaled <= 63 && scaled >= -64);
+        let scaled: i8 = scaled as i8;
+        let encoded: u32 = scaled as u32;
+        encoded & 0x7f
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct FPULeftShiftImm {
+    pub amount: u8,
+    pub lane_size_in_bits: u8,
+}
+
+impl FPULeftShiftImm {
+    pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
+        debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
+        if amount < lane_size_in_bits {
+            Some(Self {
+                amount,
+                lane_size_in_bits,
+            })
+        } else {
+            None
+        }
+    }
+
+    pub fn enc(&self) -> u32 {
+        debug_assert!(self.lane_size_in_bits.is_power_of_two());
+        debug_assert!(self.lane_size_in_bits > self.amount);
+        // The encoding of the immediate follows the table below,
+        // where xs encode the shift amount.
+        //
+        // | lane_size_in_bits | encoding |
+        // +------------------------------+
+        // | 8                 | 0001xxx  |
+        // | 16                | 001xxxx  |
+        // | 32                | 01xxxxx  |
+        // | 64                | 1xxxxxx  |
+        //
+        // The highest one bit is represented by `lane_size_in_bits`. Since
+        // `lane_size_in_bits` is a power of 2 and `amount` is less
+        // than `lane_size_in_bits`, they can be ORed
+        // together to produced the encoded value.
+        u32::from(self.lane_size_in_bits | self.amount)
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct FPURightShiftImm {
+    pub amount: u8,
+    pub lane_size_in_bits: u8,
+}
+
+impl FPURightShiftImm {
+    pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
+        debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
+        if amount > 0 && amount <= lane_size_in_bits {
+            Some(Self {
+                amount,
+                lane_size_in_bits,
+            })
+        } else {
+            None
+        }
+    }
+
+    pub fn enc(&self) -> u32 {
+        debug_assert_ne!(0, self.amount);
+        // The encoding of the immediate follows the table below,
+        // where xs encodes the negated shift amount.
+        //
+        // | lane_size_in_bits | encoding |
+        // +------------------------------+
+        // | 8                 | 0001xxx  |
+        // | 16                | 001xxxx  |
+        // | 32                | 01xxxxx  |
+        // | 64                | 1xxxxxx  |
+        //
+        // The shift amount is negated such that a shift ammount
+        // of 1 (in 64-bit) is encoded as 0b111111 and a shift
+        // amount of 64 is encoded as 0b000000,
+        // in the bottom 6 bits.
+        u32::from((self.lane_size_in_bits * 2) - self.amount)
+    }
+}
+
+/// a 9-bit signed offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm9 {
+    /// The value.
+    pub value: i16,
+}
+
+impl SImm9 {
+    /// Create a signed 9-bit offset from a full-range value, if possible.
+    pub fn maybe_from_i64(value: i64) -> Option<SImm9> {
+        if value >= -256 && value <= 255 {
+            Some(SImm9 {
+                value: value as i16,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero() -> SImm9 {
+        SImm9 { value: 0 }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        (self.value as u32) & 0x1ff
+    }
+
+    /// Signed value of immediate.
+    pub fn value(&self) -> i32 {
+        self.value as i32
+    }
+}
+
+/// An unsigned, scaled 12-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm12Scaled {
+    /// The value.
+    pub value: u16,
+    /// multiplied by the size of this type
+    pub scale_ty: Type,
+}
+
+impl UImm12Scaled {
+    /// Create a UImm12Scaled from a raw offset and the known scale type, if
+    /// possible.
+    pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
+        // Ensure the type is at least one byte.
+        let scale_ty = if scale_ty == B1 { B8 } else { scale_ty };
+
+        let scale = scale_ty.bytes();
+        assert!(scale.is_power_of_two());
+        let scale = scale as i64;
+        let limit = 4095 * scale;
+        if value >= 0 && value <= limit && (value & (scale - 1)) == 0 {
+            Some(UImm12Scaled {
+                value: value as u16,
+                scale_ty,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero(scale_ty: Type) -> UImm12Scaled {
+        UImm12Scaled { value: 0, scale_ty }
+    }
+
+    /// Encoded bits.
+    pub fn bits(&self) -> u32 {
+        (self.value as u32 / self.scale_ty.bytes()) & 0xfff
+    }
+
+    /// Value after scaling.
+    pub fn value(&self) -> u32 {
+        self.value as u32
+    }
+
+    /// The value type which is the scaling base.
+    pub fn scale_ty(&self) -> Type {
+        self.scale_ty
+    }
+}
+
+/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
+/// left by 0 or 12 places.
+#[derive(Clone, Debug)]
+pub struct Imm12 {
+    /// The immediate bits.
+    pub bits: u16,
+    /// Whether the immediate bits are shifted left by 12 or not.
+    pub shift12: bool,
+}
+
+impl Imm12 {
+    /// Compute a Imm12 from raw bits, if possible.
+    pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
+        if val == 0 {
+            Some(Imm12 {
+                bits: 0,
+                shift12: false,
+            })
+        } else if val < 0xfff {
+            Some(Imm12 {
+                bits: val as u16,
+                shift12: false,
+            })
+        } else if val < 0xfff_000 && (val & 0xfff == 0) {
+            Some(Imm12 {
+                bits: (val >> 12) as u16,
+                shift12: true,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero() -> Self {
+        Imm12 {
+            bits: 0,
+            shift12: false,
+        }
+    }
+
+    /// Bits for 2-bit "shift" field in e.g. AddI.
+    pub fn shift_bits(&self) -> u32 {
+        if self.shift12 {
+            0b01
+        } else {
+            0b00
+        }
+    }
+
+    /// Bits for 12-bit "imm" field in e.g. AddI.
+    pub fn imm_bits(&self) -> u32 {
+        self.bits as u32
+    }
+}
+
+/// An immediate for logical instructions.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ImmLogic {
+    /// The actual value.
+    value: u64,
+    /// `N` flag.
+    pub n: bool,
+    /// `S` field: element size and element bits.
+    pub r: u8,
+    /// `R` field: rotate amount.
+    pub s: u8,
+    /// Was this constructed for a 32-bit or 64-bit instruction?
+    pub size: OperandSize,
+}
+
+impl ImmLogic {
+    /// Compute an ImmLogic from raw bits, if possible.
+    pub fn maybe_from_u64(value: u64, ty: Type) -> Option<ImmLogic> {
+        // Note: This function is a port of VIXL's Assembler::IsImmLogical.
+
+        if ty != I64 && ty != I32 {
+            return None;
+        }
+        let operand_size = OperandSize::from_ty(ty);
+
+        let original_value = value;
+
+        let value = if ty == I32 {
+            // To handle 32-bit logical immediates, the very easiest thing is to repeat
+            // the input value twice to make a 64-bit word. The correct encoding of that
+            // as a logical immediate will also be the correct encoding of the 32-bit
+            // value.
+
+            // Avoid making the assumption that the most-significant 32 bits are zero by
+            // shifting the value left and duplicating it.
+            let value = value << 32;
+            value | value >> 32
+        } else {
+            value
+        };
+
+        // Logical immediates are encoded using parameters n, imm_s and imm_r using
+        // the following table:
+        //
+        //    N   imms    immr    size        S             R
+        //    1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+        //    0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+        //    0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+        //    0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+        //    0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+        //    0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+        // (s bits must not be all set)
+        //
+        // A pattern is constructed of size bits, where the least significant S+1 bits
+        // are set. The pattern is rotated right by R, and repeated across a 32 or
+        // 64-bit value, depending on destination register width.
+        //
+        // Put another way: the basic format of a logical immediate is a single
+        // contiguous stretch of 1 bits, repeated across the whole word at intervals
+        // given by a power of 2. To identify them quickly, we first locate the
+        // lowest stretch of 1 bits, then the next 1 bit above that; that combination
+        // is different for every logical immediate, so it gives us all the
+        // information we need to identify the only logical immediate that our input
+        // could be, and then we simply check if that's the value we actually have.
+        //
+        // (The rotation parameter does give the possibility of the stretch of 1 bits
+        // going 'round the end' of the word. To deal with that, we observe that in
+        // any situation where that happens the bitwise NOT of the value is also a
+        // valid logical immediate. So we simply invert the input whenever its low bit
+        // is set, and then we know that the rotated case can't arise.)
+        let (value, inverted) = if value & 1 == 1 {
+            (!value, true)
+        } else {
+            (value, false)
+        };
+
+        if value == 0 {
+            return None;
+        }
+
+        // The basic analysis idea: imagine our input word looks like this.
+        //
+        //    0011111000111110001111100011111000111110001111100011111000111110
+        //                                                          c  b    a
+        //                                                          |<--d-->|
+        //
+        // We find the lowest set bit (as an actual power-of-2 value, not its index)
+        // and call it a. Then we add a to our original number, which wipes out the
+        // bottommost stretch of set bits and replaces it with a 1 carried into the
+        // next zero bit. Then we look for the new lowest set bit, which is in
+        // position b, and subtract it, so now our number is just like the original
+        // but with the lowest stretch of set bits completely gone. Now we find the
+        // lowest set bit again, which is position c in the diagram above. Then we'll
+        // measure the distance d between bit positions a and c (using CLZ), and that
+        // tells us that the only valid logical immediate that could possibly be equal
+        // to this number is the one in which a stretch of bits running from a to just
+        // below b is replicated every d bits.
+        fn lowest_set_bit(value: u64) -> u64 {
+            let bit = value.trailing_zeros();
+            1u64.checked_shl(bit).unwrap_or(0)
+        }
+        let a = lowest_set_bit(value);
+        assert_ne!(0, a);
+        let value_plus_a = value.wrapping_add(a);
+        let b = lowest_set_bit(value_plus_a);
+        let value_plus_a_minus_b = value_plus_a - b;
+        let c = lowest_set_bit(value_plus_a_minus_b);
+
+        let (d, clz_a, out_n, mask) = if c != 0 {
+            // The general case, in which there is more than one stretch of set bits.
+            // Compute the repeat distance d, and set up a bitmask covering the basic
+            // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
+            // of these cases the N bit of the output will be zero.
+            let clz_a = a.leading_zeros();
+            let clz_c = c.leading_zeros();
+            let d = clz_a - clz_c;
+            let mask = (1 << d) - 1;
+            (d, clz_a, 0, mask)
+        } else {
+            (64, a.leading_zeros(), 1, u64::max_value())
+        };
+
+        // If the repeat period d is not a power of two, it can't be encoded.
+        if !d.is_power_of_two() {
+            return None;
+        }
+
+        if ((b.wrapping_sub(a)) & !mask) != 0 {
+            // If the bit stretch (b - a) does not fit within the mask derived from the
+            // repeat period, then fail.
+            return None;
+        }
+
+        // The only possible option is b - a repeated every d bits. Now we're going to
+        // actually construct the valid logical immediate derived from that
+        // specification, and see if it equals our original input.
+        //
+        // To repeat a value every d bits, we multiply it by a number of the form
+        // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
+        // be derived using a table lookup on CLZ(d).
+        const MULTIPLIERS: [u64; 6] = [
+            0x0000000000000001,
+            0x0000000100000001,
+            0x0001000100010001,
+            0x0101010101010101,
+            0x1111111111111111,
+            0x5555555555555555,
+        ];
+        let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize];
+        let candidate = b.wrapping_sub(a) * multiplier;
+
+        if value != candidate {
+            // The candidate pattern doesn't match our input value, so fail.
+            return None;
+        }
+
+        // We have a match! This is a valid logical immediate, so now we have to
+        // construct the bits and pieces of the instruction encoding that generates
+        // it.
+
+        // Count the set bits in our basic stretch. The special case of clz(0) == -1
+        // makes the answer come out right for stretches that reach the very top of
+        // the word (e.g. numbers like 0xffffc00000000000).
+        let clz_b = if b == 0 {
+            u32::max_value() // -1
+        } else {
+            b.leading_zeros()
+        };
+        let s = clz_a.wrapping_sub(clz_b);
+
+        // Decide how many bits to rotate right by, to put the low bit of that basic
+        // stretch in position a.
+        let (s, r) = if inverted {
+            // If we inverted the input right at the start of this function, here's
+            // where we compensate: the number of set bits becomes the number of clear
+            // bits, and the rotation count is based on position b rather than position
+            // a (since b is the location of the 'lowest' 1 bit after inversion).
+            // Need wrapping for when clz_b is max_value() (for when b == 0).
+            (d - s, clz_b.wrapping_add(1) & (d - 1))
+        } else {
+            (s, (clz_a + 1) & (d - 1))
+        };
+
+        // Now we're done, except for having to encode the S output in such a way that
+        // it gives both the number of set bits and the length of the repeated
+        // segment. The s field is encoded like this:
+        //
+        //     imms    size        S
+        //    ssssss    64    UInt(ssssss)
+        //    0sssss    32    UInt(sssss)
+        //    10ssss    16    UInt(ssss)
+        //    110sss     8    UInt(sss)
+        //    1110ss     4    UInt(ss)
+        //    11110s     2    UInt(s)
+        //
+        // So we 'or' (2 * -d) with our computed s to form imms.
+        let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f;
+        debug_assert!(u8::try_from(r).is_ok());
+        debug_assert!(u8::try_from(s).is_ok());
+        Some(ImmLogic {
+            value: original_value,
+            n: out_n != 0,
+            r: r as u8,
+            s: s as u8,
+            size: operand_size,
+        })
+    }
+
+    /// Returns bits ready for encoding: (N:1, R:6, S:6)
+    pub fn enc_bits(&self) -> u32 {
+        ((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32)
+    }
+
+    /// Returns the value that this immediate represents.
+    pub fn value(&self) -> u64 {
+        self.value
+    }
+
+    /// Return an immediate for the bitwise-inverted value.
+    pub fn invert(&self) -> ImmLogic {
+        // For every ImmLogical immediate, the inverse can also be encoded.
+        Self::maybe_from_u64(!self.value, self.size.to_ty()).unwrap()
+    }
+
+    /// This provides a safe(ish) way to avoid the costs of `maybe_from_u64` when we want to
+    /// encode a constant that we know at compiler-build time.  It constructs an `ImmLogic` from
+    /// the fields `n`, `r`, `s` and `size`, but in a debug build, checks that `value_to_check`
+    /// corresponds to those four fields.  The intention is that, in a non-debug build, this
+    /// reduces to something small enough that it will be a candidate for inlining.
+    pub fn from_n_r_s(value_to_check: u64, n: bool, r: u8, s: u8, size: OperandSize) -> Self {
+        // Construct it from the components we got given.
+        let imml = Self {
+            value: value_to_check,
+            n,
+            r,
+            s,
+            size,
+        };
+
+        // In debug mode, check that `n`/`r`/`s` are correct, given `value` and `size`.
+        debug_assert!(match ImmLogic::maybe_from_u64(
+            value_to_check,
+            if size == OperandSize::Size64 {
+                I64
+            } else {
+                I32
+            }
+        ) {
+            None => false, // fail: `value` is unrepresentable
+            Some(imml_check) => imml_check == imml,
+        });
+
+        imml
+    }
+}
+
+/// An immediate for shift instructions.
+#[derive(Clone, Debug)]
+pub struct ImmShift {
+    /// 6-bit shift amount.
+    pub imm: u8,
+}
+
+impl ImmShift {
+    /// Create an ImmShift from raw bits, if possible.
+    pub fn maybe_from_u64(val: u64) -> Option<ImmShift> {
+        if val < 64 {
+            Some(ImmShift { imm: val as u8 })
+        } else {
+            None
+        }
+    }
+
+    /// Get the immediate value.
+    pub fn value(&self) -> u8 {
+        self.imm
+    }
+}
+
+/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift.
+#[derive(Clone, Copy, Debug)]
+pub struct MoveWideConst {
+    /// The value.
+    pub bits: u16,
+    /// Result is `bits` shifted 16*shift bits to the left.
+    pub shift: u8,
+}
+
+impl MoveWideConst {
+    /// Construct a MoveWideConst from an arbitrary 64-bit constant if possible.
+    pub fn maybe_from_u64(value: u64) -> Option<MoveWideConst> {
+        let mask0 = 0x0000_0000_0000_ffffu64;
+        let mask1 = 0x0000_0000_ffff_0000u64;
+        let mask2 = 0x0000_ffff_0000_0000u64;
+        let mask3 = 0xffff_0000_0000_0000u64;
+
+        if value == (value & mask0) {
+            return Some(MoveWideConst {
+                bits: (value & mask0) as u16,
+                shift: 0,
+            });
+        }
+        if value == (value & mask1) {
+            return Some(MoveWideConst {
+                bits: ((value >> 16) & mask0) as u16,
+                shift: 1,
+            });
+        }
+        if value == (value & mask2) {
+            return Some(MoveWideConst {
+                bits: ((value >> 32) & mask0) as u16,
+                shift: 2,
+            });
+        }
+        if value == (value & mask3) {
+            return Some(MoveWideConst {
+                bits: ((value >> 48) & mask0) as u16,
+                shift: 3,
+            });
+        }
+        None
+    }
+
+    pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<MoveWideConst> {
+        let shift_enc = shift / 16;
+        if shift_enc > 3 {
+            None
+        } else {
+            Some(MoveWideConst {
+                bits: imm,
+                shift: shift_enc,
+            })
+        }
+    }
+
+    /// Returns the value that this constant represents.
+    pub fn value(&self) -> u64 {
+        (self.bits as u64) << (16 * self.shift)
+    }
+}
+
+/// Advanced SIMD modified immediate as used by MOVI/MVNI.
+#[derive(Clone, Copy, Debug)]
+pub struct ASIMDMovModImm {
+    imm: u8,
+    shift: u8,
+    shift_ones: bool,
+}
+
+impl ASIMDMovModImm {
+    pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDMovModImm> {
+        match size {
+            ScalarSize::Size8 => Some(ASIMDMovModImm {
+                imm: value as u8,
+                shift: 0,
+                shift_ones: false,
+            }),
+            _ => None,
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero() -> Self {
+        ASIMDMovModImm {
+            imm: 0,
+            shift: 0,
+            shift_ones: false,
+        }
+    }
+
+    pub fn value(&self) -> (u8, u32, bool) {
+        (self.imm, self.shift as u32, self.shift_ones)
+    }
+}
+
+impl PrettyPrint for NZCV {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
+        format!(
+            "#{}{}{}{}",
+            fmt('n', self.n),
+            fmt('z', self.z),
+            fmt('c', self.c),
+            fmt('v', self.v)
+        )
+    }
+}
+
+impl PrettyPrint for UImm5 {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl PrettyPrint for Imm12 {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        let shift = if self.shift12 { 12 } else { 0 };
+        let value = u32::from(self.bits) << shift;
+        format!("#{}", value)
+    }
+}
+
+impl PrettyPrint for SImm7Scaled {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl PrettyPrint for FPULeftShiftImm {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.amount)
+    }
+}
+
+impl PrettyPrint for FPURightShiftImm {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.amount)
+    }
+}
+
+impl PrettyPrint for SImm9 {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl PrettyPrint for UImm12Scaled {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl PrettyPrint for ImmLogic {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value())
+    }
+}
+
+impl PrettyPrint for ImmShift {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.imm)
+    }
+}
+
+impl PrettyPrint for MoveWideConst {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        if self.shift == 0 {
+            format!("#{}", self.bits)
+        } else {
+            format!("#{}, LSL #{}", self.bits, self.shift * 16)
+        }
+    }
+}
+
+impl PrettyPrint for ASIMDMovModImm {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        if self.shift == 0 {
+            format!("#{}", self.imm)
+        } else {
+            let shift_type = if self.shift_ones { "MSL" } else { "LSL" };
+            format!("#{}, {} #{}", self.imm, shift_type, self.shift)
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn imm_logical_test() {
+        assert_eq!(None, ImmLogic::maybe_from_u64(0, I64));
+        assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 1,
+                n: true,
+                r: 0,
+                s: 0,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(1, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 2,
+                n: true,
+                r: 63,
+                s: 0,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(2, I64)
+        );
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(5, I64));
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(11, I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 248,
+                n: true,
+                r: 61,
+                s: 4,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(248, I64)
+        );
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(249, I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 1920,
+                n: true,
+                r: 57,
+                s: 3,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(1920, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x7ffe,
+                n: true,
+                r: 63,
+                s: 13,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(0x7ffe, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x30000,
+                n: true,
+                r: 48,
+                s: 1,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(0x30000, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x100000,
+                n: true,
+                r: 44,
+                s: 0,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(0x100000, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: u64::max_value() - 1,
+                n: true,
+                r: 63,
+                s: 62,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0xaaaaaaaaaaaaaaaa,
+                n: false,
+                r: 1,
+                s: 60,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x8181818181818181,
+                n: false,
+                r: 1,
+                s: 49,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(0x8181818181818181, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0xffc3ffc3ffc3ffc3,
+                n: false,
+                r: 10,
+                s: 43,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x100000001,
+                n: false,
+                r: 0,
+                s: 0,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(0x100000001, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x1111111111111111,
+                n: false,
+                r: 0,
+                s: 56,
+                size: OperandSize::Size64,
+            }),
+            ImmLogic::maybe_from_u64(0x1111111111111111, I64)
+        );
+
+        for n in 0..2 {
+            let types = if n == 0 { vec![I64, I32] } else { vec![I64] };
+            for s in 0..64 {
+                for r in 0..64 {
+                    let imm = get_logical_imm(n, s, r);
+                    for &ty in &types {
+                        match ImmLogic::maybe_from_u64(imm, ty) {
+                            Some(ImmLogic { value, .. }) => {
+                                assert_eq!(imm, value);
+                                ImmLogic::maybe_from_u64(!value, ty).unwrap();
+                            }
+                            None => assert_eq!(0, imm),
+                        };
+                    }
+                }
+            }
+        }
+    }
+
+    // Repeat a value that has `width` bits, across a 64-bit value.
+    fn repeat(value: u64, width: u64) -> u64 {
+        let mut result = value & ((1 << width) - 1);
+        let mut i = width;
+        while i < 64 {
+            result |= result << i;
+            i *= 2;
+        }
+        result
+    }
+
+    // Get the logical immediate, from the encoding N/R/S bits.
+    fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 {
+        // An integer is constructed from the n, imm_s and imm_r bits according to
+        // the following table:
+        //
+        //  N   imms    immr    size        S             R
+        //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+        //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+        //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+        //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+        //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+        //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+        // (s bits must not be all set)
+        //
+        // A pattern is constructed of size bits, where the least significant S+1
+        // bits are set. The pattern is rotated right by R, and repeated across a
+        // 64-bit value.
+
+        if n == 1 {
+            if s == 0x3f {
+                return 0;
+            }
+            let bits = (1u64 << (s + 1)) - 1;
+            bits.rotate_right(r)
+        } else {
+            if (s >> 1) == 0x1f {
+                return 0;
+            }
+            let mut width = 0x20;
+            while width >= 0x2 {
+                if (s & width) == 0 {
+                    let mask = width - 1;
+                    if (s & mask) == mask {
+                        return 0;
+                    }
+                    let bits = (1u64 << ((s & mask) + 1)) - 1;
+                    return repeat(bits.rotate_right(r & mask), width.into());
+                }
+                width >>= 1;
+            }
+            unreachable!();
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
new file mode 100644
index 0000000000..278302018e
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
@@ -0,0 +1,4057 @@
+//! This module defines aarch64-specific machine instruction types.
+
+// Some variants are not constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+
+use crate::binemit::CodeOffset;
+use crate::ir::types::{
+    B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8,
+    I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
+};
+use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type};
+use crate::isa::CallConv;
+use crate::machinst::*;
+use crate::{settings, CodegenError, CodegenResult};
+
+use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
+use regalloc::{RegUsageCollector, RegUsageMapper};
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::convert::TryFrom;
+use smallvec::{smallvec, SmallVec};
+use std::string::{String, ToString};
+
+pub mod regs;
+pub use self::regs::*;
+pub mod imms;
+pub use self::imms::*;
+pub mod args;
+pub use self::args::*;
+pub mod emit;
+pub use self::emit::*;
+pub mod unwind;
+
+#[cfg(test)]
+mod emit_tests;
+
+//=============================================================================
+// Instructions (top level): definition
+
+/// An ALU operation. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp {
+    Add32,
+    Add64,
+    Sub32,
+    Sub64,
+    Orr32,
+    Orr64,
+    OrrNot32,
+    OrrNot64,
+    And32,
+    And64,
+    AndNot32,
+    AndNot64,
+    /// XOR (AArch64 calls this "EOR")
+    Eor32,
+    /// XOR (AArch64 calls this "EOR")
+    Eor64,
+    /// XNOR (AArch64 calls this "EOR-NOT")
+    EorNot32,
+    /// XNOR (AArch64 calls this "EOR-NOT")
+    EorNot64,
+    /// Add, setting flags
+    AddS32,
+    /// Add, setting flags
+    AddS64,
+    /// Sub, setting flags
+    SubS32,
+    /// Sub, setting flags
+    SubS64,
+    /// Signed multiply, high-word result
+    SMulH,
+    /// Unsigned multiply, high-word result
+    UMulH,
+    SDiv64,
+    UDiv64,
+    RotR32,
+    RotR64,
+    Lsr32,
+    Lsr64,
+    Asr32,
+    Asr64,
+    Lsl32,
+    Lsl64,
+}
+
+/// An ALU operation with three arguments.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp3 {
+    /// Multiply-add
+    MAdd32,
+    /// Multiply-add
+    MAdd64,
+    /// Multiply-sub
+    MSub32,
+    /// Multiply-sub
+    MSub64,
+}
+
+/// A floating-point unit (FPU) operation with one arg.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp1 {
+    Abs32,
+    Abs64,
+    Neg32,
+    Neg64,
+    Sqrt32,
+    Sqrt64,
+    Cvt32To64,
+    Cvt64To32,
+}
+
+/// A floating-point unit (FPU) operation with two args.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp2 {
+    Add32,
+    Add64,
+    Sub32,
+    Sub64,
+    Mul32,
+    Mul64,
+    Div32,
+    Div64,
+    Max32,
+    Max64,
+    Min32,
+    Min64,
+    /// Signed saturating add
+    Sqadd64,
+    /// Unsigned saturating add
+    Uqadd64,
+    /// Signed saturating subtract
+    Sqsub64,
+    /// Unsigned saturating subtract
+    Uqsub64,
+}
+
+/// A floating-point unit (FPU) operation with two args, a register and an immediate.
+#[derive(Copy, Clone, Debug)]
+pub enum FPUOpRI {
+    /// Unsigned right shift. Rd = Rn << #imm
+    UShr32(FPURightShiftImm),
+    /// Unsigned right shift. Rd = Rn << #imm
+    UShr64(FPURightShiftImm),
+    /// Shift left and insert. Rd |= Rn << #imm
+    Sli32(FPULeftShiftImm),
+    /// Shift left and insert. Rd |= Rn << #imm
+    Sli64(FPULeftShiftImm),
+}
+
+/// A floating-point unit (FPU) operation with three args.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp3 {
+    MAdd32,
+    MAdd64,
+}
+
+/// A conversion from an FP to an integer value.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FpuToIntOp {
+    F32ToU32,
+    F32ToI32,
+    F32ToU64,
+    F32ToI64,
+    F64ToU32,
+    F64ToI32,
+    F64ToU64,
+    F64ToI64,
+}
+
+/// A conversion from an integer to an FP value.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum IntToFpuOp {
+    U32ToF32,
+    I32ToF32,
+    U32ToF64,
+    I32ToF64,
+    U64ToF32,
+    I64ToF32,
+    U64ToF64,
+    I64ToF64,
+}
+
+/// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to
+/// nearest, and for 32- or 64-bit FP values.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FpuRoundMode {
+    Minus32,
+    Minus64,
+    Plus32,
+    Plus64,
+    Zero32,
+    Zero64,
+    Nearest32,
+    Nearest64,
+}
+
+/// Type of vector element extensions.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecExtendOp {
+    /// Signed extension of 8-bit elements
+    Sxtl8,
+    /// Signed extension of 16-bit elements
+    Sxtl16,
+    /// Signed extension of 32-bit elements
+    Sxtl32,
+    /// Unsigned extension of 8-bit elements
+    Uxtl8,
+    /// Unsigned extension of 16-bit elements
+    Uxtl16,
+    /// Unsigned extension of 32-bit elements
+    Uxtl32,
+}
+
+/// A vector ALU operation.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecALUOp {
+    /// Signed saturating add
+    Sqadd,
+    /// Unsigned saturating add
+    Uqadd,
+    /// Signed saturating subtract
+    Sqsub,
+    /// Unsigned saturating subtract
+    Uqsub,
+    /// Compare bitwise equal
+    Cmeq,
+    /// Compare signed greater than or equal
+    Cmge,
+    /// Compare signed greater than
+    Cmgt,
+    /// Compare unsigned higher
+    Cmhs,
+    /// Compare unsigned higher or same
+    Cmhi,
+    /// Floating-point compare equal
+    Fcmeq,
+    /// Floating-point compare greater than
+    Fcmgt,
+    /// Floating-point compare greater than or equal
+    Fcmge,
+    /// Bitwise and
+    And,
+    /// Bitwise bit clear
+    Bic,
+    /// Bitwise inclusive or
+    Orr,
+    /// Bitwise exclusive or
+    Eor,
+    /// Bitwise select
+    Bsl,
+    /// Unsigned maximum pairwise
+    Umaxp,
+    /// Add
+    Add,
+    /// Subtract
+    Sub,
+    /// Multiply
+    Mul,
+    /// Signed shift left
+    Sshl,
+    /// Unsigned shift left
+    Ushl,
+    /// Unsigned minimum
+    Umin,
+    /// Signed minimum
+    Smin,
+    /// Unsigned maximum
+    Umax,
+    /// Signed maximum
+    Smax,
+    /// Unsigned rounding halving add
+    Urhadd,
+    /// Floating-point add
+    Fadd,
+    /// Floating-point subtract
+    Fsub,
+    /// Floating-point divide
+    Fdiv,
+    /// Floating-point maximum
+    Fmax,
+    /// Floating-point minimum
+    Fmin,
+    /// Floating-point multiply
+    Fmul,
+    /// Add pairwise
+    Addp,
+    /// Unsigned multiply add long
+    Umlal,
+    /// Zip vectors (primary) [meaning, high halves]
+    Zip1,
+    /// Signed multiply long (low halves)
+    Smull,
+    /// Signed multiply long (high halves)
+    Smull2,
+}
+
+/// A Vector miscellaneous operation with two registers.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecMisc2 {
+    /// Bitwise NOT
+    Not,
+    /// Negate
+    Neg,
+    /// Absolute value
+    Abs,
+    /// Floating-point absolute value
+    Fabs,
+    /// Floating-point negate
+    Fneg,
+    /// Floating-point square root
+    Fsqrt,
+    /// Reverse elements in 64-bit doublewords
+    Rev64,
+    /// Shift left long (by element size)
+    Shll,
+    /// Floating-point convert to signed integer, rounding toward zero
+    Fcvtzs,
+    /// Floating-point convert to unsigned integer, rounding toward zero
+    Fcvtzu,
+    /// Signed integer convert to floating-point
+    Scvtf,
+    /// Unsigned integer convert to floating-point
+    Ucvtf,
+    /// Floating point round to integral, rounding towards nearest
+    Frintn,
+    /// Floating point round to integral, rounding towards zero
+    Frintz,
+    /// Floating point round to integral, rounding towards minus infinity
+    Frintm,
+    /// Floating point round to integral, rounding towards plus infinity
+    Frintp,
+}
+
+/// A Vector narrowing operation with two registers.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecMiscNarrowOp {
+    /// Extract Narrow
+    Xtn,
+    /// Signed saturating extract narrow
+    Sqxtn,
+    /// Signed saturating extract unsigned narrow
+    Sqxtun,
+}
+
+/// An operation across the lanes of vectors.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecLanesOp {
+    /// Integer addition across a vector
+    Addv,
+    /// Unsigned minimum across a vector
+    Uminv,
+}
+
+/// A shift-by-immediate operation on each lane of a vector.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecShiftImmOp {
+    // Unsigned shift left
+    Shl,
+    // Unsigned shift right
+    Ushr,
+    // Signed shift right
+    Sshr,
+}
+
+/// An operation on the bits of a register. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum BitOp {
+    /// Bit reverse
+    RBit32,
+    /// Bit reverse
+    RBit64,
+    Clz32,
+    Clz64,
+    Cls32,
+    Cls64,
+}
+
+impl BitOp {
+    /// What is the opcode's native width?
+    pub fn operand_size(&self) -> OperandSize {
+        match self {
+            BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => OperandSize::Size32,
+            _ => OperandSize::Size64,
+        }
+    }
+
+    /// Get the assembly mnemonic for this opcode.
+    pub fn op_str(&self) -> &'static str {
+        match self {
+            BitOp::RBit32 | BitOp::RBit64 => "rbit",
+            BitOp::Clz32 | BitOp::Clz64 => "clz",
+            BitOp::Cls32 | BitOp::Cls64 => "cls",
+        }
+    }
+}
+
+impl From<(Opcode, Type)> for BitOp {
+    /// Get the BitOp from the IR opcode.
+    fn from(op_ty: (Opcode, Type)) -> BitOp {
+        match op_ty {
+            (Opcode::Bitrev, I32) => BitOp::RBit32,
+            (Opcode::Bitrev, I64) => BitOp::RBit64,
+            (Opcode::Clz, I32) => BitOp::Clz32,
+            (Opcode::Clz, I64) => BitOp::Clz64,
+            (Opcode::Cls, I32) => BitOp::Cls32,
+            (Opcode::Cls, I64) => BitOp::Cls64,
+            _ => unreachable!("Called with non-bit op!: {:?}", op_ty),
+        }
+    }
+}
+
+/// Additional information for (direct) Call instructions, left out of line to lower the size of
+/// the Inst enum.
+#[derive(Clone, Debug)]
+pub struct CallInfo {
+    pub dest: ExternalName,
+    pub uses: Vec<Reg>,
+    pub defs: Vec<Writable<Reg>>,
+    pub opcode: Opcode,
+    pub caller_callconv: CallConv,
+    pub callee_callconv: CallConv,
+}
+
+/// Additional information for CallInd instructions, left out of line to lower the size of the Inst
+/// enum.
+#[derive(Clone, Debug)]
+pub struct CallIndInfo {
+    pub rn: Reg,
+    pub uses: Vec<Reg>,
+    pub defs: Vec<Writable<Reg>>,
+    pub opcode: Opcode,
+    pub caller_callconv: CallConv,
+    pub callee_callconv: CallConv,
+}
+
+/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst
+/// enum.
+#[derive(Clone, Debug)]
+pub struct JTSequenceInfo {
+    pub targets: Vec<BranchTarget>,
+    pub default_target: BranchTarget,
+    pub targets_for_term: Vec<MachLabel>, // needed for MachTerminator.
+}
+
+/// Instruction formats.
+#[derive(Clone, Debug)]
+pub enum Inst {
+    /// A no-op of zero size.
+    Nop0,
+
+    /// A no-op that is one instruction large.
+    Nop4,
+
+    /// An ALU operation with two register sources and a register destination.
+    AluRRR {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+    },
+    /// An ALU operation with three register sources and a register destination.
+    AluRRRR {
+        alu_op: ALUOp3,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        ra: Reg,
+    },
+    /// An ALU operation with a register source and an immediate-12 source, and a register
+    /// destination.
+    AluRRImm12 {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        imm12: Imm12,
+    },
+    /// An ALU operation with a register source and an immediate-logic source, and a register destination.
+    AluRRImmLogic {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        imml: ImmLogic,
+    },
+    /// An ALU operation with a register source and an immediate-shiftamt source, and a register destination.
+    AluRRImmShift {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        immshift: ImmShift,
+    },
+    /// An ALU operation with two register sources, one of which can be shifted, and a register
+    /// destination.
+    AluRRRShift {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        shiftop: ShiftOpAndAmt,
+    },
+    /// An ALU operation with two register sources, one of which can be {zero,sign}-extended and
+    /// shifted, and a register destination.
+    AluRRRExtend {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        extendop: ExtendOp,
+    },
+
+    /// A bit op instruction with a single register source.
+    BitRR {
+        op: BitOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// An unsigned (zero-extending) 8-bit load.
+    ULoad8 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// A signed (sign-extending) 8-bit load.
+    SLoad8 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// An unsigned (zero-extending) 16-bit load.
+    ULoad16 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// A signed (sign-extending) 16-bit load.
+    SLoad16 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// An unsigned (zero-extending) 32-bit load.
+    ULoad32 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// A signed (sign-extending) 32-bit load.
+    SLoad32 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// A 64-bit load.
+    ULoad64 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+
+    /// An 8-bit store.
+    Store8 {
+        rd: Reg,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// A 16-bit store.
+    Store16 {
+        rd: Reg,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// A 32-bit store.
+    Store32 {
+        rd: Reg,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// A 64-bit store.
+    Store64 {
+        rd: Reg,
+        mem: AMode,
+        flags: MemFlags,
+    },
+
+    /// A store of a pair of registers.
+    StoreP64 {
+        rt: Reg,
+        rt2: Reg,
+        mem: PairAMode,
+        flags: MemFlags,
+    },
+    /// A load of a pair of registers.
+    LoadP64 {
+        rt: Writable<Reg>,
+        rt2: Writable<Reg>,
+        mem: PairAMode,
+        flags: MemFlags,
+    },
+
+    /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we
+    /// keep them separate at the `Inst` level for better pretty-printing
+    /// and faster `is_move()` logic.
+    Mov64 {
+        rd: Writable<Reg>,
+        rm: Reg,
+    },
+
+    /// A 32-bit MOV. Zeroes the top 32 bits of the destination. This is
+    /// effectively an alias for an unsigned 32-to-64-bit extension.
+    Mov32 {
+        rd: Writable<Reg>,
+        rm: Reg,
+    },
+
+    /// A MOVZ with a 16-bit immediate.
+    MovZ {
+        rd: Writable<Reg>,
+        imm: MoveWideConst,
+        size: OperandSize,
+    },
+
+    /// A MOVN with a 16-bit immediate.
+    MovN {
+        rd: Writable<Reg>,
+        imm: MoveWideConst,
+        size: OperandSize,
+    },
+
+    /// A MOVK with a 16-bit immediate.
+    MovK {
+        rd: Writable<Reg>,
+        imm: MoveWideConst,
+        size: OperandSize,
+    },
+
+    /// A sign- or zero-extend operation.
+    Extend {
+        rd: Writable<Reg>,
+        rn: Reg,
+        signed: bool,
+        from_bits: u8,
+        to_bits: u8,
+    },
+
+    /// A conditional-select operation.
+    CSel {
+        rd: Writable<Reg>,
+        cond: Cond,
+        rn: Reg,
+        rm: Reg,
+    },
+
+    /// A conditional-set operation.
+    CSet {
+        rd: Writable<Reg>,
+        cond: Cond,
+    },
+
+    /// A conditional comparison with an immediate.
+    CCmpImm {
+        size: OperandSize,
+        rn: Reg,
+        imm: UImm5,
+        nzcv: NZCV,
+        cond: Cond,
+    },
+
+    /// A synthetic insn, which is a load-linked store-conditional loop, that has the overall
+    /// effect of atomically modifying a memory location in a particular way.  Because we have
+    /// no way to explain to the regalloc about earlyclobber registers, this instruction has
+    /// completely fixed operand registers, and we rely on the RA's coalescing to remove copies
+    /// in the surrounding code to the extent it can.  The sequence is both preceded and
+    /// followed by a fence which is at least as comprehensive as that of the `Fence`
+    /// instruction below.  This instruction is sequentially consistent.  The operand
+    /// conventions are:
+    ///
+    /// x25   (rd) address
+    /// x26   (rd) second operand for `op`
+    /// x27   (wr) old value
+    /// x24   (wr) scratch reg; value afterwards has no meaning
+    /// x28   (wr) scratch reg; value afterwards has no meaning
+    AtomicRMW {
+        ty: Type, // I8, I16, I32 or I64
+        op: inst_common::AtomicRmwOp,
+    },
+
+    /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
+    /// store-conditional loop.  (Although we could possibly implement it more directly using
+    /// CAS insns that are available in some revisions of AArch64 above 8.0).  The sequence is
+    /// both preceded and followed by a fence which is at least as comprehensive as that of the
+    /// `Fence` instruction below.  This instruction is sequentially consistent.  Note that the
+    /// operand conventions, although very similar to AtomicRMW, are different:
+    ///
+    /// x25   (rd) address
+    /// x26   (rd) expected value
+    /// x28   (rd) replacement value
+    /// x27   (wr) old value
+    /// x24   (wr) scratch reg; value afterwards has no meaning
+    AtomicCAS {
+        ty: Type, // I8, I16, I32 or I64
+    },
+
+    /// Read `ty` bits from address `r_addr`, zero extend the loaded value to 64 bits and put it
+    /// in `r_data`.  The load instruction is preceded by a fence at least as comprehensive as
+    /// that of the `Fence` instruction below.  This instruction is sequentially consistent.
+    AtomicLoad {
+        ty: Type, // I8, I16, I32 or I64
+        r_data: Writable<Reg>,
+        r_addr: Reg,
+    },
+
+    /// Write the lowest `ty` bits of `r_data` to address `r_addr`, with a memory fence
+    /// instruction following the store.  The fence is at least as comprehensive as that of the
+    /// `Fence` instruction below.  This instruction is sequentially consistent.
+    AtomicStore {
+        ty: Type, // I8, I16, I32 or I64
+        r_data: Reg,
+        r_addr: Reg,
+    },
+
+    /// A memory fence.  This must provide ordering to ensure that, at a minimum, neither loads
+    /// nor stores may move forwards or backwards across the fence.  Currently emitted as "dmb
+    /// ish".  This instruction is sequentially consistent.
+    Fence,
+
+    /// FPU move. Note that this is distinct from a vector-register
+    /// move; moving just 64 bits seems to be significantly faster.
+    FpuMove64 {
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// Vector register move.
+    FpuMove128 {
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// Move to scalar from a vector element.
+    FpuMoveFromVec {
+        rd: Writable<Reg>,
+        rn: Reg,
+        idx: u8,
+        size: VectorSize,
+    },
+
+    /// 1-op FPU instruction.
+    FpuRR {
+        fpu_op: FPUOp1,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// 2-op FPU instruction.
+    FpuRRR {
+        fpu_op: FPUOp2,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+    },
+
+    FpuRRI {
+        fpu_op: FPUOpRI,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// 3-op FPU instruction.
+    FpuRRRR {
+        fpu_op: FPUOp3,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        ra: Reg,
+    },
+
+    /// FPU comparison, single-precision (32 bit).
+    FpuCmp32 {
+        rn: Reg,
+        rm: Reg,
+    },
+
+    /// FPU comparison, double-precision (64 bit).
+    FpuCmp64 {
+        rn: Reg,
+        rm: Reg,
+    },
+
+    /// Floating-point load, single-precision (32 bit).
+    FpuLoad32 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// Floating-point store, single-precision (32 bit).
+    FpuStore32 {
+        rd: Reg,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// Floating-point load, double-precision (64 bit).
+    FpuLoad64 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// Floating-point store, double-precision (64 bit).
+    FpuStore64 {
+        rd: Reg,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// Floating-point/vector load, 128 bit.
+    FpuLoad128 {
+        rd: Writable<Reg>,
+        mem: AMode,
+        flags: MemFlags,
+    },
+    /// Floating-point/vector store, 128 bit.
+    FpuStore128 {
+        rd: Reg,
+        mem: AMode,
+        flags: MemFlags,
+    },
+
+    LoadFpuConst64 {
+        rd: Writable<Reg>,
+        const_data: u64,
+    },
+
+    LoadFpuConst128 {
+        rd: Writable<Reg>,
+        const_data: u128,
+    },
+
+    /// Conversion: FP -> integer.
+    FpuToInt {
+        op: FpuToIntOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// Conversion: integer -> FP.
+    IntToFpu {
+        op: IntToFpuOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// FP conditional select, 32 bit.
+    FpuCSel32 {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        cond: Cond,
+    },
+    /// FP conditional select, 64 bit.
+    FpuCSel64 {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        cond: Cond,
+    },
+
+    /// Round to integer.
+    FpuRound {
+        op: FpuRoundMode,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// Move from a GPR to a vector register.  The scalar value is parked in the lowest lane
+    /// of the destination, and all other lanes are zeroed out.  Currently only 32- and 64-bit
+    /// transactions are supported.
+    MovToFpu {
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: ScalarSize,
+    },
+
+    /// Move to a vector element from a GPR.
+    MovToVec {
+        rd: Writable<Reg>,
+        rn: Reg,
+        idx: u8,
+        size: VectorSize,
+    },
+
+    /// Unsigned move from a vector element to a GPR.
+    MovFromVec {
+        rd: Writable<Reg>,
+        rn: Reg,
+        idx: u8,
+        size: VectorSize,
+    },
+
+    /// Signed move from a vector element to a GPR.
+    MovFromVecSigned {
+        rd: Writable<Reg>,
+        rn: Reg,
+        idx: u8,
+        size: VectorSize,
+        scalar_size: OperandSize,
+    },
+
+    /// Duplicate general-purpose register to vector.
+    VecDup {
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: VectorSize,
+    },
+
+    /// Duplicate scalar to vector.
+    VecDupFromFpu {
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: VectorSize,
+    },
+
+    /// Duplicate immediate to vector.
+    VecDupImm {
+        rd: Writable<Reg>,
+        imm: ASIMDMovModImm,
+        invert: bool,
+        size: VectorSize,
+    },
+
+    /// Vector extend.
+    VecExtend {
+        t: VecExtendOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        high_half: bool,
+    },
+
+    /// Move vector element to another vector element.
+    VecMovElement {
+        rd: Writable<Reg>,
+        rn: Reg,
+        dest_idx: u8,
+        src_idx: u8,
+        size: VectorSize,
+    },
+
+    /// Vector narrowing operation.
+    VecMiscNarrow {
+        op: VecMiscNarrowOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: VectorSize,
+        high_half: bool,
+    },
+
+    /// A vector ALU op.
+    VecRRR {
+        alu_op: VecALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        size: VectorSize,
+    },
+
+    /// Vector two register miscellaneous instruction.
+    VecMisc {
+        op: VecMisc2,
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: VectorSize,
+    },
+
+    /// Vector instruction across lanes.
+    VecLanes {
+        op: VecLanesOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: VectorSize,
+    },
+
+    /// Vector shift by immediate: Shift Left (immediate), Unsigned Shift Right (immediate),
+    /// Signed Shift Right (immediate).  These are somewhat unusual in that, for right shifts,
+    /// the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive.  A zero
+    /// right-shift cannot be encoded.  Left shifts are "normal", though, having valid `imm`
+    /// values from 0 to lane-size-in-bits - 1 inclusive.
+    VecShiftImm {
+        op: VecShiftImmOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: VectorSize,
+        imm: u8,
+    },
+
+    /// Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
+    /// of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
+    VecExtract {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        imm4: u8,
+    },
+
+    /// Table vector lookup - single register table. The table consists of 8-bit elements and is
+    /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
+    /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
+    /// vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
+    /// to 0.
+    VecTbl {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        is_extension: bool,
+    },
+
+    /// Table vector lookup - two register table. The table consists of 8-bit elements and is
+    /// stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
+    /// specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
+    /// the destination vector that correspond to out-of-range indices (greater than 31) unmodified
+    /// or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
+    /// modulo 32, that is v31 and v0 (in that order) are consecutive registers.
+    VecTbl2 {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rn2: Reg,
+        rm: Reg,
+        is_extension: bool,
+    },
+
+    /// Load an element and replicate to all lanes of a vector.
+    VecLoadReplicate {
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: VectorSize,
+    },
+
+    /// Vector conditional select, 128 bit.  A synthetic instruction, which generates a 4-insn
+    /// control-flow diamond.
+    VecCSel {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        cond: Cond,
+    },
+
+    /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
+    MovToNZCV {
+        rn: Reg,
+    },
+
+    /// Move from the NZCV flags (actually a `MRS Xn, NZCV` insn).
+    MovFromNZCV {
+        rd: Writable<Reg>,
+    },
+
+    /// A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation
+    /// of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the
+    /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
+    /// target.
+    Call {
+        info: Box<CallInfo>,
+    },
+    /// A machine indirect-call instruction.
+    CallInd {
+        info: Box<CallIndInfo>,
+    },
+
+    // ---- branches (exactly one must appear at end of BB) ----
+    /// A machine return instruction.
+    Ret,
+
+    /// A placeholder instruction, generating no code, meaning that a function epilogue must be
+    /// inserted there.
+    EpiloguePlaceholder,
+
+    /// An unconditional branch.
+    Jump {
+        dest: BranchTarget,
+    },
+
+    /// A conditional branch. Contains two targets; at emission time, both are emitted, but
+    /// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the
+    /// choice of taken/not_taken (inverting the branch polarity as needed) based on the
+    /// fallthrough at the time of lowering.
+    CondBr {
+        taken: BranchTarget,
+        not_taken: BranchTarget,
+        kind: CondBrKind,
+    },
+
+    /// A conditional trap: execute a `udf` if the condition is true. This is
+    /// one VCode instruction because it uses embedded control flow; it is
+    /// logically a single-in, single-out region, but needs to appear as one
+    /// unit to the register allocator.
+    ///
+    /// The `CondBrKind` gives the conditional-branch condition that will
+    /// *execute* the embedded `Inst`. (In the emitted code, we use the inverse
+    /// of this condition in a branch that skips the trap instruction.)
+    TrapIf {
+        kind: CondBrKind,
+        trap_code: TrapCode,
+    },
+
+    /// An indirect branch through a register, augmented with set of all
+    /// possible successors.
+    IndirectBr {
+        rn: Reg,
+        targets: Vec<MachLabel>,
+    },
+
+    /// A "break" instruction, used for e.g. traps and debug breakpoints.
+    Brk,
+
+    /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at
+    /// runtime.
+    Udf {
+        trap_code: TrapCode,
+    },
+
+    /// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
+    /// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
+    /// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
+    /// need full `MemLabel` support.
+    Adr {
+        rd: Writable<Reg>,
+        /// Offset in range -2^20 .. 2^20.
+        off: i32,
+    },
+
+    /// Raw 32-bit word, used for inline constants and jump-table entries.
+    Word4 {
+        data: u32,
+    },
+
+    /// Raw 64-bit word, used for inline constants.
+    Word8 {
+        data: u64,
+    },
+
+    /// Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale).
+    JTSequence {
+        info: Box<JTSequenceInfo>,
+        ridx: Reg,
+        rtmp1: Writable<Reg>,
+        rtmp2: Writable<Reg>,
+    },
+
+    /// Load an inline symbol reference.
+    LoadExtName {
+        rd: Writable<Reg>,
+        name: Box<ExternalName>,
+        offset: i64,
+    },
+
+    /// Load address referenced by `mem` into `rd`.
+    LoadAddr {
+        rd: Writable<Reg>,
+        mem: AMode,
+    },
+
+    /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
+    /// controls how AMode::NominalSPOffset args are lowered.
+    VirtualSPOffsetAdj {
+        offset: i64,
+    },
+
+    /// Meta-insn, no-op in generated code: emit constant/branch veneer island
+    /// at this point (with a guard jump around it) if less than the needed
+    /// space is available before the next branch deadline. See the `MachBuffer`
+    /// implementation in `machinst/buffer.rs` for the overall algorithm. In
+    /// brief, we retain a set of "pending/unresolved label references" from
+    /// branches as we scan forward through instructions to emit machine code;
+    /// if we notice we're about to go out of range on an unresolved reference,
+    /// we stop, emit a bunch of "veneers" (branches in a form that has a longer
+    /// range, e.g. a 26-bit-offset unconditional jump), and point the original
+    /// label references to those. This is an "island" because it comes in the
+    /// middle of the code.
+    ///
+    /// This meta-instruction is a necessary part of the logic that determines
+    /// where to place islands. Ordinarily, we want to place them between basic
+    /// blocks, so we compute the worst-case size of each block, and emit the
+    /// island before starting a block if we would exceed a deadline before the
+    /// end of the block. However, some sequences (such as an inline jumptable)
+    /// are variable-length and not accounted for by this logic; so these
+    /// lowered sequences include an `EmitIsland` to trigger island generation
+    /// where necessary.
+    EmitIsland {
+        /// The needed space before the next deadline.
+        needed_space: CodeOffset,
+    },
+}
+
+fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
+    let mut count = 0;
+    for _ in 0..num_half_words {
+        if value & 0xffff == 0 {
+            count += 1;
+        }
+        value >>= 16;
+    }
+
+    count
+}
+
+#[test]
+fn inst_size_test() {
+    // This test will help with unintentionally growing the size
+    // of the Inst enum.
+    assert_eq!(32, std::mem::size_of::<Inst>());
+}
+
+impl Inst {
+    /// Create a move instruction.
+    pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+        assert!(to_reg.to_reg().get_class() == from_reg.get_class());
+        if from_reg.get_class() == RegClass::I64 {
+            Inst::Mov64 {
+                rd: to_reg,
+                rm: from_reg,
+            }
+        } else if from_reg.get_class() == RegClass::V128 {
+            Inst::FpuMove128 {
+                rd: to_reg,
+                rn: from_reg,
+            }
+        } else {
+            Inst::FpuMove64 {
+                rd: to_reg,
+                rn: from_reg,
+            }
+        }
+    }
+
+    /// Create a 32-bit move instruction.
+    pub fn mov32(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+        Inst::Mov32 {
+            rd: to_reg,
+            rm: from_reg,
+        }
+    }
+
+    /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
+    /// logical immediate, or constant pool).
+    pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
+        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
+            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
+            smallvec![Inst::MovZ {
+                rd,
+                imm,
+                size: OperandSize::Size64
+            }]
+        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
+            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
+            smallvec![Inst::MovN {
+                rd,
+                imm,
+                size: OperandSize::Size64
+            }]
+        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
+            // Weird logical-instruction immediate in ORI using zero register
+            smallvec![Inst::AluRRImmLogic {
+                alu_op: ALUOp::Orr64,
+                rd,
+                rn: zero_reg(),
+                imml,
+            }]
+        } else {
+            let mut insts = smallvec![];
+
+            // If the top 32 bits are zero, use 32-bit `mov` operations.
+            let (num_half_words, size, negated) = if value >> 32 == 0 {
+                (2, OperandSize::Size32, (!value << 32) >> 32)
+            } else {
+                (4, OperandSize::Size64, !value)
+            };
+            // If the number of 0xffff half words is greater than the number of 0x0000 half words
+            // it is more efficient to use `movn` for the first instruction.
+            let first_is_inverted = count_zero_half_words(negated, num_half_words)
+                > count_zero_half_words(value, num_half_words);
+            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
+            // instruction used.
+            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
+            let mut first_mov_emitted = false;
+
+            for i in 0..num_half_words {
+                let imm16 = (value >> (16 * i)) & 0xffff;
+                if imm16 != ignored_halfword {
+                    if !first_mov_emitted {
+                        first_mov_emitted = true;
+                        if first_is_inverted {
+                            let imm =
+                                MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
+                                    .unwrap();
+                            insts.push(Inst::MovN { rd, imm, size });
+                        } else {
+                            let imm =
+                                MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
+                            insts.push(Inst::MovZ { rd, imm, size });
+                        }
+                    } else {
+                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
+                        insts.push(Inst::MovK { rd, imm, size });
+                    }
+                }
+            }
+
+            assert!(first_mov_emitted);
+
+            insts
+        }
+    }
+
+    /// Create instructions that load a 32-bit floating-point constant.
+    pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        value: u32,
+        mut alloc_tmp: F,
+    ) -> SmallVec<[Inst; 4]> {
+        if value == 0 {
+            smallvec![Inst::VecDupImm {
+                rd,
+                imm: ASIMDMovModImm::zero(),
+                invert: false,
+                size: VectorSize::Size8x8
+            }]
+        } else {
+            // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
+            // bits.
+            let tmp = alloc_tmp(RegClass::I64, I32);
+            let mut insts = Inst::load_constant(tmp, value as u64);
+
+            insts.push(Inst::MovToFpu {
+                rd,
+                rn: tmp.to_reg(),
+                size: ScalarSize::Size64,
+            });
+
+            insts
+        }
+    }
+
+    /// Create instructions that load a 64-bit floating-point constant.
+    pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        const_data: u64,
+        mut alloc_tmp: F,
+    ) -> SmallVec<[Inst; 4]> {
+        if let Ok(const_data) = u32::try_from(const_data) {
+            Inst::load_fp_constant32(rd, const_data, alloc_tmp)
+        // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
+        // bits.  Also, treat it as half of a 128-bit vector and consider replicated
+        // patterns. Scalar MOVI might also be an option.
+        } else if const_data & (u32::MAX as u64) == 0 {
+            let tmp = alloc_tmp(RegClass::I64, I64);
+            let mut insts = Inst::load_constant(tmp, const_data);
+
+            insts.push(Inst::MovToFpu {
+                rd,
+                rn: tmp.to_reg(),
+                size: ScalarSize::Size64,
+            });
+
+            insts
+        } else {
+            smallvec![Inst::LoadFpuConst64 { rd, const_data }]
+        }
+    }
+
+    /// Create instructions that load a 128-bit vector constant.
+    pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        const_data: u128,
+        alloc_tmp: F,
+    ) -> SmallVec<[Inst; 5]> {
+        if let Ok(const_data) = u64::try_from(const_data) {
+            SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
+        } else if let Some((pattern, size)) =
+            Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
+        {
+            Inst::load_replicated_vector_pattern(
+                rd,
+                pattern,
+                VectorSize::from_lane_size(size, true),
+                alloc_tmp,
+            )
+        } else {
+            smallvec![Inst::LoadFpuConst128 { rd, const_data }]
+        }
+    }
+
+    /// Determine whether a 128-bit constant represents a vector consisting of elements with
+    /// the same value.
+    pub fn get_replicated_vector_pattern(
+        value: u128,
+        size: ScalarSize,
+    ) -> Option<(u64, ScalarSize)> {
+        let (mask, shift, next_size) = match size {
+            ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
+            ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
+            ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
+            ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
+            _ => return None,
+        };
+        let mut r = None;
+        let v = value & mask;
+
+        if (value >> shift) & mask == v {
+            r = Inst::get_replicated_vector_pattern(v, next_size);
+
+            if r.is_none() {
+                r = Some((v as u64, size));
+            }
+        }
+
+        r
+    }
+
+    /// Create instructions that load a 128-bit vector constant consisting of elements with
+    /// the same value.
+    pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        pattern: u64,
+        size: VectorSize,
+        mut alloc_tmp: F,
+    ) -> SmallVec<[Inst; 5]> {
+        let lane_size = size.lane_size();
+
+        if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
+            smallvec![Inst::VecDupImm {
+                rd,
+                imm,
+                invert: false,
+                size
+            }]
+        } else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
+            debug_assert_ne!(lane_size, ScalarSize::Size8);
+            debug_assert_ne!(lane_size, ScalarSize::Size64);
+
+            smallvec![Inst::VecDupImm {
+                rd,
+                imm,
+                invert: true,
+                size
+            }]
+        } else {
+            let tmp = alloc_tmp(RegClass::I64, I64);
+            let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
+
+            insts.push(Inst::VecDup {
+                rd,
+                rn: tmp.to_reg(),
+                size,
+            });
+
+            insts
+        }
+    }
+
+    /// Generic constructor for a load (zero-extending where appropriate).
+    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
+        match ty {
+            B1 | B8 | I8 => Inst::ULoad8 {
+                rd: into_reg,
+                mem,
+                flags,
+            },
+            B16 | I16 => Inst::ULoad16 {
+                rd: into_reg,
+                mem,
+                flags,
+            },
+            B32 | I32 | R32 => Inst::ULoad32 {
+                rd: into_reg,
+                mem,
+                flags,
+            },
+            B64 | I64 | R64 => Inst::ULoad64 {
+                rd: into_reg,
+                mem,
+                flags,
+            },
+            F32 => Inst::FpuLoad32 {
+                rd: into_reg,
+                mem,
+                flags,
+            },
+            F64 => Inst::FpuLoad64 {
+                rd: into_reg,
+                mem,
+                flags,
+            },
+            _ => {
+                if ty.is_vector() {
+                    let bits = ty_bits(ty);
+                    let rd = into_reg;
+
+                    if bits == 128 {
+                        Inst::FpuLoad128 { rd, mem, flags }
+                    } else {
+                        assert_eq!(bits, 64);
+                        Inst::FpuLoad64 { rd, mem, flags }
+                    }
+                } else {
+                    unimplemented!("gen_load({})", ty);
+                }
+            }
+        }
+    }
+
+    /// Generic constructor for a store.
+    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
+        match ty {
+            B1 | B8 | I8 => Inst::Store8 {
+                rd: from_reg,
+                mem,
+                flags,
+            },
+            B16 | I16 => Inst::Store16 {
+                rd: from_reg,
+                mem,
+                flags,
+            },
+            B32 | I32 | R32 => Inst::Store32 {
+                rd: from_reg,
+                mem,
+                flags,
+            },
+            B64 | I64 | R64 => Inst::Store64 {
+                rd: from_reg,
+                mem,
+                flags,
+            },
+            F32 => Inst::FpuStore32 {
+                rd: from_reg,
+                mem,
+                flags,
+            },
+            F64 => Inst::FpuStore64 {
+                rd: from_reg,
+                mem,
+                flags,
+            },
+            _ => {
+                if ty.is_vector() {
+                    let bits = ty_bits(ty);
+                    let rd = from_reg;
+
+                    if bits == 128 {
+                        Inst::FpuStore128 { rd, mem, flags }
+                    } else {
+                        assert_eq!(bits, 64);
+                        Inst::FpuStore64 { rd, mem, flags }
+                    }
+                } else {
+                    unimplemented!("gen_store({})", ty);
+                }
+            }
+        }
+    }
+}
+
+//=============================================================================
+// Instructions: get_regs
+
+fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) {
+    match memarg {
+        &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => {
+            collector.add_use(reg);
+        }
+        &AMode::RegReg(r1, r2, ..)
+        | &AMode::RegScaled(r1, r2, ..)
+        | &AMode::RegScaledExtended(r1, r2, ..)
+        | &AMode::RegExtended(r1, r2, ..) => {
+            collector.add_use(r1);
+            collector.add_use(r2);
+        }
+        &AMode::Label(..) => {}
+        &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => {
+            collector.add_mod(reg);
+        }
+        &AMode::FPOffset(..) => {
+            collector.add_use(fp_reg());
+        }
+        &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {
+            collector.add_use(stack_reg());
+        }
+        &AMode::RegOffset(r, ..) => {
+            collector.add_use(r);
+        }
+    }
+}
+
+fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) {
+    match pairmemarg {
+        &PairAMode::SignedOffset(reg, ..) => {
+            collector.add_use(reg);
+        }
+        &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => {
+            collector.add_mod(reg);
+        }
+    }
+}
+
+fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+    match inst {
+        &Inst::AluRRR { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::AluRRRR { rd, rn, rm, ra, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+            collector.add_use(ra);
+        }
+        &Inst::AluRRImm12 { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::AluRRImmLogic { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::AluRRImmShift { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::AluRRRShift { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::AluRRRExtend { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::BitRR { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::ULoad8 { rd, ref mem, .. }
+        | &Inst::SLoad8 { rd, ref mem, .. }
+        | &Inst::ULoad16 { rd, ref mem, .. }
+        | &Inst::SLoad16 { rd, ref mem, .. }
+        | &Inst::ULoad32 { rd, ref mem, .. }
+        | &Inst::SLoad32 { rd, ref mem, .. }
+        | &Inst::ULoad64 { rd, ref mem, .. } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::Store8 { rd, ref mem, .. }
+        | &Inst::Store16 { rd, ref mem, .. }
+        | &Inst::Store32 { rd, ref mem, .. }
+        | &Inst::Store64 { rd, ref mem, .. } => {
+            collector.add_use(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::StoreP64 {
+            rt, rt2, ref mem, ..
+        } => {
+            collector.add_use(rt);
+            collector.add_use(rt2);
+            pairmemarg_regs(mem, collector);
+        }
+        &Inst::LoadP64 {
+            rt, rt2, ref mem, ..
+        } => {
+            collector.add_def(rt);
+            collector.add_def(rt2);
+            pairmemarg_regs(mem, collector);
+        }
+        &Inst::Mov64 { rd, rm } => {
+            collector.add_def(rd);
+            collector.add_use(rm);
+        }
+        &Inst::Mov32 { rd, rm } => {
+            collector.add_def(rd);
+            collector.add_use(rm);
+        }
+        &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::MovK { rd, .. } => {
+            collector.add_mod(rd);
+        }
+        &Inst::CSel { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::CSet { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::CCmpImm { rn, .. } => {
+            collector.add_use(rn);
+        }
+        &Inst::AtomicRMW { .. } => {
+            collector.add_use(xreg(25));
+            collector.add_use(xreg(26));
+            collector.add_def(writable_xreg(24));
+            collector.add_def(writable_xreg(27));
+            collector.add_def(writable_xreg(28));
+        }
+        &Inst::AtomicCAS { .. } => {
+            collector.add_use(xreg(25));
+            collector.add_use(xreg(26));
+            collector.add_use(xreg(28));
+            collector.add_def(writable_xreg(24));
+            collector.add_def(writable_xreg(27));
+        }
+        &Inst::AtomicLoad { r_data, r_addr, .. } => {
+            collector.add_use(r_addr);
+            collector.add_def(r_data);
+        }
+        &Inst::AtomicStore { r_data, r_addr, .. } => {
+            collector.add_use(r_addr);
+            collector.add_use(r_data);
+        }
+        &Inst::Fence {} => {}
+        &Inst::FpuMove64 { rd, rn } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::FpuMove128 { rd, rn } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::FpuMoveFromVec { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::FpuRR { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::FpuRRR { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::FpuRRI { fpu_op, rd, rn, .. } => {
+            match fpu_op {
+                FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd),
+                FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd),
+            }
+            collector.add_use(rn);
+        }
+        &Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+            collector.add_use(ra);
+        }
+        &Inst::VecMisc { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+
+        &Inst::VecLanes { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecShiftImm { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecExtract { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::VecTbl {
+            rd,
+            rn,
+            rm,
+            is_extension,
+        } => {
+            collector.add_use(rn);
+            collector.add_use(rm);
+
+            if is_extension {
+                collector.add_mod(rd);
+            } else {
+                collector.add_def(rd);
+            }
+        }
+        &Inst::VecTbl2 {
+            rd,
+            rn,
+            rn2,
+            rm,
+            is_extension,
+        } => {
+            collector.add_use(rn);
+            collector.add_use(rn2);
+            collector.add_use(rm);
+
+            if is_extension {
+                collector.add_mod(rd);
+            } else {
+                collector.add_def(rd);
+            }
+        }
+        &Inst::VecLoadReplicate { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecCSel { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::FpuLoad32 { rd, ref mem, .. } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuLoad64 { rd, ref mem, .. } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuLoad128 { rd, ref mem, .. } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuStore32 { rd, ref mem, .. } => {
+            collector.add_use(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuStore64 { rd, ref mem, .. } => {
+            collector.add_use(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuStore128 { rd, ref mem, .. } => {
+            collector.add_use(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::FpuToInt { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::IntToFpu { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::FpuRound { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::MovToFpu { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::MovToVec { rd, rn, .. } => {
+            collector.add_mod(rd);
+            collector.add_use(rn);
+        }
+        &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecDup { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecDupFromFpu { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecDupImm { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::VecExtend { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecMovElement { rd, rn, .. } => {
+            collector.add_mod(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecMiscNarrow {
+            rd, rn, high_half, ..
+        } => {
+            collector.add_use(rn);
+
+            if high_half {
+                collector.add_mod(rd);
+            } else {
+                collector.add_def(rd);
+            }
+        }
+        &Inst::VecRRR {
+            alu_op, rd, rn, rm, ..
+        } => {
+            if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
+                collector.add_mod(rd);
+            } else {
+                collector.add_def(rd);
+            }
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::MovToNZCV { rn } => {
+            collector.add_use(rn);
+        }
+        &Inst::MovFromNZCV { rd } => {
+            collector.add_def(rd);
+        }
+        &Inst::Extend { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {}
+        &Inst::Call { ref info, .. } => {
+            collector.add_uses(&*info.uses);
+            collector.add_defs(&*info.defs);
+        }
+        &Inst::CallInd { ref info, .. } => {
+            collector.add_uses(&*info.uses);
+            collector.add_defs(&*info.defs);
+            collector.add_use(info.rn);
+        }
+        &Inst::CondBr { ref kind, .. } => match kind {
+            CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
+                collector.add_use(*rt);
+            }
+            CondBrKind::Cond(_) => {}
+        },
+        &Inst::IndirectBr { rn, .. } => {
+            collector.add_use(rn);
+        }
+        &Inst::Nop0 | Inst::Nop4 => {}
+        &Inst::Brk => {}
+        &Inst::Udf { .. } => {}
+        &Inst::TrapIf { ref kind, .. } => match kind {
+            CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
+                collector.add_use(*rt);
+            }
+            CondBrKind::Cond(_) => {}
+        },
+        &Inst::Adr { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::Word4 { .. } | &Inst::Word8 { .. } => {}
+        &Inst::JTSequence {
+            ridx, rtmp1, rtmp2, ..
+        } => {
+            collector.add_use(ridx);
+            collector.add_def(rtmp1);
+            collector.add_def(rtmp2);
+        }
+        &Inst::LoadExtName { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::LoadAddr { rd, ref mem } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::VirtualSPOffsetAdj { .. } => {}
+        &Inst::EmitIsland { .. } => {}
+    }
+}
+
+//=============================================================================
+// Instructions: map_regs
+
+fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
+    fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
+        if r.is_virtual() {
+            let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
+            *r = new;
+        }
+    }
+
+    fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+        if r.to_reg().is_virtual() {
+            let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+            *r = Writable::from_reg(new);
+        }
+    }
+
+    fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+        if r.to_reg().is_virtual() {
+            let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+            *r = Writable::from_reg(new);
+        }
+    }
+
+    fn map_mem<RUM: RegUsageMapper>(m: &RUM, mem: &mut AMode) {
+        // N.B.: we take only the pre-map here, but this is OK because the
+        // only addressing modes that update registers (pre/post-increment on
+        // AArch64) both read and write registers, so they are "mods" rather
+        // than "defs", so must be the same in both the pre- and post-map.
+        match mem {
+            &mut AMode::Unscaled(ref mut reg, ..) => map_use(m, reg),
+            &mut AMode::UnsignedOffset(ref mut reg, ..) => map_use(m, reg),
+            &mut AMode::RegReg(ref mut r1, ref mut r2)
+            | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..)
+            | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..)
+            | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => {
+                map_use(m, r1);
+                map_use(m, r2);
+            }
+            &mut AMode::Label(..) => {}
+            &mut AMode::PreIndexed(ref mut r, ..) => map_mod(m, r),
+            &mut AMode::PostIndexed(ref mut r, ..) => map_mod(m, r),
+            &mut AMode::FPOffset(..)
+            | &mut AMode::SPOffset(..)
+            | &mut AMode::NominalSPOffset(..) => {}
+            &mut AMode::RegOffset(ref mut r, ..) => map_use(m, r),
+        };
+    }
+
+    fn map_pairmem<RUM: RegUsageMapper>(m: &RUM, mem: &mut PairAMode) {
+        match mem {
+            &mut PairAMode::SignedOffset(ref mut reg, ..) => map_use(m, reg),
+            &mut PairAMode::PreIndexed(ref mut reg, ..) => map_def(m, reg),
+            &mut PairAMode::PostIndexed(ref mut reg, ..) => map_def(m, reg),
+        }
+    }
+
+    fn map_br<RUM: RegUsageMapper>(m: &RUM, br: &mut CondBrKind) {
+        match br {
+            &mut CondBrKind::Zero(ref mut reg) => map_use(m, reg),
+            &mut CondBrKind::NotZero(ref mut reg) => map_use(m, reg),
+            &mut CondBrKind::Cond(..) => {}
+        };
+    }
+
+    match inst {
+        &mut Inst::AluRRR {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::AluRRRR {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ref mut ra,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+            map_use(mapper, ra);
+        }
+        &mut Inst::AluRRImm12 {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::AluRRImmLogic {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::AluRRImmShift {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::AluRRRShift {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::AluRRRExtend {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::BitRR {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::ULoad8 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::SLoad8 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::ULoad16 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::SLoad16 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::ULoad32 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::SLoad32 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+
+        &mut Inst::ULoad64 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::Store8 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_use(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::Store16 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_use(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::Store32 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_use(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::Store64 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_use(mapper, rd);
+            map_mem(mapper, mem);
+        }
+
+        &mut Inst::StoreP64 {
+            ref mut rt,
+            ref mut rt2,
+            ref mut mem,
+            ..
+        } => {
+            map_use(mapper, rt);
+            map_use(mapper, rt2);
+            map_pairmem(mapper, mem);
+        }
+        &mut Inst::LoadP64 {
+            ref mut rt,
+            ref mut rt2,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rt);
+            map_def(mapper, rt2);
+            map_pairmem(mapper, mem);
+        }
+        &mut Inst::Mov64 {
+            ref mut rd,
+            ref mut rm,
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rm);
+        }
+        &mut Inst::Mov32 {
+            ref mut rd,
+            ref mut rm,
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rm);
+        }
+        &mut Inst::MovZ { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::MovN { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::MovK { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::CSel {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::CSet { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::CCmpImm { ref mut rn, .. } => {
+            map_use(mapper, rn);
+        }
+        &mut Inst::AtomicRMW { .. } => {
+            // There are no vregs to map in this insn.
+        }
+        &mut Inst::AtomicCAS { .. } => {
+            // There are no vregs to map in this insn.
+        }
+        &mut Inst::AtomicLoad {
+            ref mut r_data,
+            ref mut r_addr,
+            ..
+        } => {
+            map_def(mapper, r_data);
+            map_use(mapper, r_addr);
+        }
+        &mut Inst::AtomicStore {
+            ref mut r_data,
+            ref mut r_addr,
+            ..
+        } => {
+            map_use(mapper, r_data);
+            map_use(mapper, r_addr);
+        }
+        &mut Inst::Fence {} => {}
+        &mut Inst::FpuMove64 {
+            ref mut rd,
+            ref mut rn,
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::FpuMove128 {
+            ref mut rd,
+            ref mut rn,
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::FpuMoveFromVec {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::FpuRR {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::FpuRRR {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::FpuRRI {
+            fpu_op,
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            match fpu_op {
+                FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => map_def(mapper, rd),
+                FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => map_mod(mapper, rd),
+            }
+            map_use(mapper, rn);
+        }
+        &mut Inst::FpuRRRR {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ref mut ra,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+            map_use(mapper, ra);
+        }
+        &mut Inst::VecMisc {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecLanes {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecShiftImm {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecExtract {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::VecTbl {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            is_extension,
+        } => {
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+
+            if is_extension {
+                map_mod(mapper, rd);
+            } else {
+                map_def(mapper, rd);
+            }
+        }
+        &mut Inst::VecTbl2 {
+            ref mut rd,
+            ref mut rn,
+            ref mut rn2,
+            ref mut rm,
+            is_extension,
+        } => {
+            map_use(mapper, rn);
+            map_use(mapper, rn2);
+            map_use(mapper, rm);
+
+            if is_extension {
+                map_mod(mapper, rd);
+            } else {
+                map_def(mapper, rd);
+            }
+        }
+        &mut Inst::VecLoadReplicate {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecCSel {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::FpuCmp32 {
+            ref mut rn,
+            ref mut rm,
+        } => {
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::FpuCmp64 {
+            ref mut rn,
+            ref mut rm,
+        } => {
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::FpuLoad32 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::FpuLoad64 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::FpuLoad128 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::FpuStore32 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_use(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::FpuStore64 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_use(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::FpuStore128 {
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_use(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::LoadFpuConst128 { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::FpuToInt {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::IntToFpu {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::FpuCSel32 {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::FpuCSel64 {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::FpuRound {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::MovToFpu {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::MovToVec {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_mod(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::MovFromVec {
+            ref mut rd,
+            ref mut rn,
+            ..
+        }
+        | &mut Inst::MovFromVecSigned {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecDup {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecDupFromFpu {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecDupImm { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::VecExtend {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecMovElement {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_mod(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecMiscNarrow {
+            ref mut rd,
+            ref mut rn,
+            high_half,
+            ..
+        } => {
+            map_use(mapper, rn);
+
+            if high_half {
+                map_mod(mapper, rd);
+            } else {
+                map_def(mapper, rd);
+            }
+        }
+        &mut Inst::VecRRR {
+            alu_op,
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
+                map_mod(mapper, rd);
+            } else {
+                map_def(mapper, rd);
+            }
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
+        &mut Inst::MovToNZCV { ref mut rn } => {
+            map_use(mapper, rn);
+        }
+        &mut Inst::MovFromNZCV { ref mut rd } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::Extend {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::Jump { .. } => {}
+        &mut Inst::Call { ref mut info } => {
+            for r in info.uses.iter_mut() {
+                map_use(mapper, r);
+            }
+            for r in info.defs.iter_mut() {
+                map_def(mapper, r);
+            }
+        }
+        &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
+        &mut Inst::CallInd { ref mut info, .. } => {
+            for r in info.uses.iter_mut() {
+                map_use(mapper, r);
+            }
+            for r in info.defs.iter_mut() {
+                map_def(mapper, r);
+            }
+            map_use(mapper, &mut info.rn);
+        }
+        &mut Inst::CondBr { ref mut kind, .. } => {
+            map_br(mapper, kind);
+        }
+        &mut Inst::IndirectBr { ref mut rn, .. } => {
+            map_use(mapper, rn);
+        }
+        &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {}
+        &mut Inst::TrapIf { ref mut kind, .. } => {
+            map_br(mapper, kind);
+        }
+        &mut Inst::Adr { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {}
+        &mut Inst::JTSequence {
+            ref mut ridx,
+            ref mut rtmp1,
+            ref mut rtmp2,
+            ..
+        } => {
+            map_use(mapper, ridx);
+            map_def(mapper, rtmp1);
+            map_def(mapper, rtmp2);
+        }
+        &mut Inst::LoadExtName { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
+        &mut Inst::LoadAddr {
+            ref mut rd,
+            ref mut mem,
+        } => {
+            map_def(mapper, rd);
+            map_mem(mapper, mem);
+        }
+        &mut Inst::VirtualSPOffsetAdj { .. } => {}
+        &mut Inst::EmitIsland { .. } => {}
+    }
+}
+
+//=============================================================================
+// Instructions: misc functions and external interface
+
+impl MachInst for Inst {
+    type LabelUse = LabelUse;
+
+    fn get_regs(&self, collector: &mut RegUsageCollector) {
+        aarch64_get_regs(self, collector)
+    }
+
+    fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+        aarch64_map_regs(self, mapper);
+    }
+
+    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+        match self {
+            &Inst::Mov64 { rd, rm } => Some((rd, rm)),
+            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
+            &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
+            _ => None,
+        }
+    }
+
+    fn is_epilogue_placeholder(&self) -> bool {
+        if let Inst::EpiloguePlaceholder = self {
+            true
+        } else {
+            false
+        }
+    }
+
+    fn is_included_in_clobbers(&self) -> bool {
+        // We exclude call instructions from the clobber-set when they are calls
+        // from caller to callee with the same ABI. Such calls cannot possibly
+        // force any new registers to be saved in the prologue, because anything
+        // that the callee clobbers, the caller is also allowed to clobber. This
+        // both saves work and enables us to more precisely follow the
+        // half-caller-save, half-callee-save SysV ABI for some vector
+        // registers.
+        //
+        // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
+        // more information on this ABI-implementation hack.
+        match self {
+            &Inst::Call { ref info } => info.caller_callconv != info.callee_callconv,
+            &Inst::CallInd { ref info } => info.caller_callconv != info.callee_callconv,
+            _ => true,
+        }
+    }
+
+    fn is_term<'a>(&'a self) -> MachTerminator<'a> {
+        match self {
+            &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
+            &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
+            &Inst::CondBr {
+                taken, not_taken, ..
+            } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
+            &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]),
+            &Inst::JTSequence { ref info, .. } => {
+                MachTerminator::Indirect(&info.targets_for_term[..])
+            }
+            _ => MachTerminator::None,
+        }
+    }
+
+    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+        assert!(ty.bits() <= 128);
+        Inst::mov(to_reg, from_reg)
+    }
+
+    fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+        to_reg: Writable<Reg>,
+        value: u64,
+        ty: Type,
+        alloc_tmp: F,
+    ) -> SmallVec<[Inst; 4]> {
+        if ty == F64 {
+            Inst::load_fp_constant64(to_reg, value, alloc_tmp)
+        } else if ty == F32 {
+            Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp)
+        } else {
+            // Must be an integer type.
+            debug_assert!(
+                ty == B1
+                    || ty == I8
+                    || ty == B8
+                    || ty == I16
+                    || ty == B16
+                    || ty == I32
+                    || ty == B32
+                    || ty == I64
+                    || ty == B64
+                    || ty == R32
+                    || ty == R64
+            );
+            Inst::load_constant(to_reg, value)
+        }
+    }
+
+    fn gen_zero_len_nop() -> Inst {
+        Inst::Nop0
+    }
+
+    fn gen_nop(preferred_size: usize) -> Inst {
+        // We can't give a NOP (or any insn) < 4 bytes.
+        assert!(preferred_size >= 4);
+        Inst::Nop4
+    }
+
+    fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
+        None
+    }
+
+    fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+        match ty {
+            I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
+            F32 | F64 => Ok(RegClass::V128),
+            IFLAGS | FFLAGS => Ok(RegClass::I64),
+            B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => {
+                Ok(RegClass::V128)
+            }
+            _ => Err(CodegenError::Unsupported(format!(
+                "Unexpected SSA-value type: {}",
+                ty
+            ))),
+        }
+    }
+
+    fn gen_jump(target: MachLabel) -> Inst {
+        Inst::Jump {
+            dest: BranchTarget::Label(target),
+        }
+    }
+
+    fn reg_universe(flags: &settings::Flags) -> RealRegUniverse {
+        create_reg_universe(flags)
+    }
+
+    fn worst_case_size() -> CodeOffset {
+        // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
+        // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
+        // 64-bit f64 constants.
+        //
+        // Note that inline jump-tables handle island/pool insertion separately, so we do not need
+        // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
+        // feasible for other reasons).
+        44
+    }
+
+    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
+        RegClass::I64
+    }
+}
+
+//=============================================================================
+// Pretty-printing of instructions.
+
+fn mem_finalize_for_show(
+    mem: &AMode,
+    mb_rru: Option<&RealRegUniverse>,
+    state: &EmitState,
+) -> (String, AMode) {
+    let (mem_insts, mem) = mem_finalize(0, mem, state);
+    let mut mem_str = mem_insts
+        .into_iter()
+        .map(|inst| inst.show_rru(mb_rru))
+        .collect::<Vec<_>>()
+        .join(" ; ");
+    if !mem_str.is_empty() {
+        mem_str += " ; ";
+    }
+
+    (mem_str, mem)
+}
+
+impl PrettyPrint for Inst {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        self.pretty_print(mb_rru, &mut EmitState::default())
+    }
+}
+
+impl Inst {
+    fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+        fn op_name_size(alu_op: ALUOp) -> (&'static str, OperandSize) {
+            match alu_op {
+                ALUOp::Add32 => ("add", OperandSize::Size32),
+                ALUOp::Add64 => ("add", OperandSize::Size64),
+                ALUOp::Sub32 => ("sub", OperandSize::Size32),
+                ALUOp::Sub64 => ("sub", OperandSize::Size64),
+                ALUOp::Orr32 => ("orr", OperandSize::Size32),
+                ALUOp::Orr64 => ("orr", OperandSize::Size64),
+                ALUOp::And32 => ("and", OperandSize::Size32),
+                ALUOp::And64 => ("and", OperandSize::Size64),
+                ALUOp::Eor32 => ("eor", OperandSize::Size32),
+                ALUOp::Eor64 => ("eor", OperandSize::Size64),
+                ALUOp::AddS32 => ("adds", OperandSize::Size32),
+                ALUOp::AddS64 => ("adds", OperandSize::Size64),
+                ALUOp::SubS32 => ("subs", OperandSize::Size32),
+                ALUOp::SubS64 => ("subs", OperandSize::Size64),
+                ALUOp::SMulH => ("smulh", OperandSize::Size64),
+                ALUOp::UMulH => ("umulh", OperandSize::Size64),
+                ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
+                ALUOp::UDiv64 => ("udiv", OperandSize::Size64),
+                ALUOp::AndNot32 => ("bic", OperandSize::Size32),
+                ALUOp::AndNot64 => ("bic", OperandSize::Size64),
+                ALUOp::OrrNot32 => ("orn", OperandSize::Size32),
+                ALUOp::OrrNot64 => ("orn", OperandSize::Size64),
+                ALUOp::EorNot32 => ("eon", OperandSize::Size32),
+                ALUOp::EorNot64 => ("eon", OperandSize::Size64),
+                ALUOp::RotR32 => ("ror", OperandSize::Size32),
+                ALUOp::RotR64 => ("ror", OperandSize::Size64),
+                ALUOp::Lsr32 => ("lsr", OperandSize::Size32),
+                ALUOp::Lsr64 => ("lsr", OperandSize::Size64),
+                ALUOp::Asr32 => ("asr", OperandSize::Size32),
+                ALUOp::Asr64 => ("asr", OperandSize::Size64),
+                ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
+                ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
+            }
+        }
+
+        match self {
+            &Inst::Nop0 => "nop-zero-len".to_string(),
+            &Inst::Nop4 => "nop".to_string(),
+            &Inst::AluRRR { alu_op, rd, rn, rm } => {
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rm = show_ireg_sized(rm, mb_rru, size);
+                format!("{} {}, {}, {}", op, rd, rn, rm)
+            }
+            &Inst::AluRRRR {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                ra,
+            } => {
+                let (op, size) = match alu_op {
+                    ALUOp3::MAdd32 => ("madd", OperandSize::Size32),
+                    ALUOp3::MAdd64 => ("madd", OperandSize::Size64),
+                    ALUOp3::MSub32 => ("msub", OperandSize::Size32),
+                    ALUOp3::MSub64 => ("msub", OperandSize::Size64),
+                };
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rm = show_ireg_sized(rm, mb_rru, size);
+                let ra = show_ireg_sized(ra, mb_rru, size);
+
+                format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
+            }
+            &Inst::AluRRImm12 {
+                alu_op,
+                rd,
+                rn,
+                ref imm12,
+            } => {
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+
+                if imm12.bits == 0 && alu_op == ALUOp::Add64 {
+                    // special-case MOV (used for moving into SP).
+                    format!("mov {}, {}", rd, rn)
+                } else {
+                    let imm12 = imm12.show_rru(mb_rru);
+                    format!("{} {}, {}, {}", op, rd, rn, imm12)
+                }
+            }
+            &Inst::AluRRImmLogic {
+                alu_op,
+                rd,
+                rn,
+                ref imml,
+            } => {
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let imml = imml.show_rru(mb_rru);
+                format!("{} {}, {}, {}", op, rd, rn, imml)
+            }
+            &Inst::AluRRImmShift {
+                alu_op,
+                rd,
+                rn,
+                ref immshift,
+            } => {
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let immshift = immshift.show_rru(mb_rru);
+                format!("{} {}, {}, {}", op, rd, rn, immshift)
+            }
+            &Inst::AluRRRShift {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                ref shiftop,
+            } => {
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rm = show_ireg_sized(rm, mb_rru, size);
+                let shiftop = shiftop.show_rru(mb_rru);
+                format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop)
+            }
+            &Inst::AluRRRExtend {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                ref extendop,
+            } => {
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rm = show_ireg_sized(rm, mb_rru, size);
+                let extendop = extendop.show_rru(mb_rru);
+                format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
+            }
+            &Inst::BitRR { op, rd, rn } => {
+                let size = op.operand_size();
+                let op = op.op_str();
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::ULoad8 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::SLoad8 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::ULoad16 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::SLoad16 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::ULoad32 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::SLoad32 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::ULoad64 {
+                rd,
+                ref mem,
+                ..
+            } => {
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+
+                let is_unscaled = match &mem {
+                    &AMode::Unscaled(..) => true,
+                    _ => false,
+                };
+                let (op, size) = match (self, is_unscaled) {
+                    (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
+                    (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
+                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
+                    (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
+                    (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
+                    (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
+                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
+                    (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
+                    (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
+                    (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
+                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
+                    (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
+                    (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
+                    (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
+                    _ => unreachable!(),
+                };
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}{} {}, {}", mem_str, op, rd, mem)
+            }
+            &Inst::Store8 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::Store16 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::Store32 {
+                rd,
+                ref mem,
+                ..
+            }
+            | &Inst::Store64 {
+                rd,
+                ref mem,
+                ..
+            } => {
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+
+                let is_unscaled = match &mem {
+                    &AMode::Unscaled(..) => true,
+                    _ => false,
+                };
+                let (op, size) = match (self, is_unscaled) {
+                    (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
+                    (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
+                    (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
+                    (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
+                    (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
+                    (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
+                    (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
+                    (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
+                    _ => unreachable!(),
+                };
+                let rd = show_ireg_sized(rd, mb_rru, size);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}{} {}, {}", mem_str, op, rd, mem)
+            }
+            &Inst::StoreP64 { rt, rt2, ref mem, .. } => {
+                let rt = rt.show_rru(mb_rru);
+                let rt2 = rt2.show_rru(mb_rru);
+                let mem = mem.show_rru(mb_rru);
+                format!("stp {}, {}, {}", rt, rt2, mem)
+            }
+            &Inst::LoadP64 { rt, rt2, ref mem, .. } => {
+                let rt = rt.to_reg().show_rru(mb_rru);
+                let rt2 = rt2.to_reg().show_rru(mb_rru);
+                let mem = mem.show_rru(mb_rru);
+                format!("ldp {}, {}, {}", rt, rt2, mem)
+            }
+            &Inst::Mov64 { rd, rm } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rm = rm.show_rru(mb_rru);
+                format!("mov {}, {}", rd, rm)
+            }
+            &Inst::Mov32 { rd, rm } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+                let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32);
+                format!("mov {}, {}", rd, rm)
+            }
+            &Inst::MovZ { rd, ref imm, size } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let imm = imm.show_rru(mb_rru);
+                format!("movz {}, {}", rd, imm)
+            }
+            &Inst::MovN { rd, ref imm, size } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let imm = imm.show_rru(mb_rru);
+                format!("movn {}, {}", rd, imm)
+            }
+            &Inst::MovK { rd, ref imm, size } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let imm = imm.show_rru(mb_rru);
+                format!("movk {}, {}", rd, imm)
+            }
+            &Inst::CSel { rd, rn, rm, cond } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rn = rn.show_rru(mb_rru);
+                let rm = rm.show_rru(mb_rru);
+                let cond = cond.show_rru(mb_rru);
+                format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
+            }
+            &Inst::CSet { rd, cond } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let cond = cond.show_rru(mb_rru);
+                format!("cset {}, {}", rd, cond)
+            }
+            &Inst::CCmpImm {
+                size,
+                rn,
+                imm,
+                nzcv,
+                cond,
+            } => {
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let imm = imm.show_rru(mb_rru);
+                let nzcv = nzcv.show_rru(mb_rru);
+                let cond = cond.show_rru(mb_rru);
+                format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
+            }
+            &Inst::AtomicRMW { ty, op, .. } => {
+                format!(
+                    "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
+                    ty.bits(), op)
+            }
+            &Inst::AtomicCAS { ty, .. } => {
+                format!(
+                    "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
+                    ty.bits())
+            }
+            &Inst::AtomicLoad { ty, r_data, r_addr, .. } => {
+                format!(
+                    "atomically {{ {} = zero_extend_{}_bits_at[{}] }}",
+                    r_data.show_rru(mb_rru), ty.bits(), r_addr.show_rru(mb_rru))
+            }
+            &Inst::AtomicStore { ty, r_data, r_addr, .. } => {
+                format!(
+                    "atomically {{ {}_bits_at[{}] = {} }}", ty.bits(), r_addr.show_rru(mb_rru), r_data.show_rru(mb_rru))
+            }
+            &Inst::Fence {} => {
+                format!("dmb ish")
+            }
+            &Inst::FpuMove64 { rd, rn } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rn = rn.show_rru(mb_rru);
+                format!("mov {}.8b, {}.8b", rd, rn)
+            }
+            &Inst::FpuMove128 { rd, rn } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rn = rn.show_rru(mb_rru);
+                format!("mov {}.16b, {}.16b", rd, rn)
+            }
+            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
+                let rn = show_vreg_element(rn, mb_rru, idx, size);
+                format!("mov {}, {}", rd, rn)
+            }
+            &Inst::FpuRR { fpu_op, rd, rn } => {
+                let (op, sizesrc, sizedest) = match fpu_op {
+                    FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32),
+                    FPUOp1::Abs64 => ("fabs", ScalarSize::Size64, ScalarSize::Size64),
+                    FPUOp1::Neg32 => ("fneg", ScalarSize::Size32, ScalarSize::Size32),
+                    FPUOp1::Neg64 => ("fneg", ScalarSize::Size64, ScalarSize::Size64),
+                    FPUOp1::Sqrt32 => ("fsqrt", ScalarSize::Size32, ScalarSize::Size32),
+                    FPUOp1::Sqrt64 => ("fsqrt", ScalarSize::Size64, ScalarSize::Size64),
+                    FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64),
+                    FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32),
+                };
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
+                let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+                let (op, size) = match fpu_op {
+                    FPUOp2::Add32 => ("fadd", ScalarSize::Size32),
+                    FPUOp2::Add64 => ("fadd", ScalarSize::Size64),
+                    FPUOp2::Sub32 => ("fsub", ScalarSize::Size32),
+                    FPUOp2::Sub64 => ("fsub", ScalarSize::Size64),
+                    FPUOp2::Mul32 => ("fmul", ScalarSize::Size32),
+                    FPUOp2::Mul64 => ("fmul", ScalarSize::Size64),
+                    FPUOp2::Div32 => ("fdiv", ScalarSize::Size32),
+                    FPUOp2::Div64 => ("fdiv", ScalarSize::Size64),
+                    FPUOp2::Max32 => ("fmax", ScalarSize::Size32),
+                    FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
+                    FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
+                    FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
+                    FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
+                    FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
+                    FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
+                    FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
+                };
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+                let rn = show_vreg_scalar(rn, mb_rru, size);
+                let rm = show_vreg_scalar(rm, mb_rru, size);
+                format!("{} {}, {}, {}", op, rd, rn, rm)
+            }
+            &Inst::FpuRRI { fpu_op, rd, rn } => {
+                let (op, imm, vector) = match fpu_op {
+                    FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true),
+                    FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false),
+                    FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true),
+                    FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false),
+                };
+
+                let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
+                    |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
+                } else {
+                    |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
+                };
+                let rd = show_vreg_fn(rd.to_reg(), mb_rru);
+                let rn = show_vreg_fn(rn, mb_rru);
+                format!("{} {}, {}, {}", op, rd, rn, imm)
+            }
+            &Inst::FpuRRRR {
+                fpu_op,
+                rd,
+                rn,
+                rm,
+                ra,
+            } => {
+                let (op, size) = match fpu_op {
+                    FPUOp3::MAdd32 => ("fmadd", ScalarSize::Size32),
+                    FPUOp3::MAdd64 => ("fmadd", ScalarSize::Size64),
+                };
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+                let rn = show_vreg_scalar(rn, mb_rru, size);
+                let rm = show_vreg_scalar(rm, mb_rru, size);
+                let ra = show_vreg_scalar(ra, mb_rru, size);
+                format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
+            }
+            &Inst::FpuCmp32 { rn, rm } => {
+                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
+                let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
+                format!("fcmp {}, {}", rn, rm)
+            }
+            &Inst::FpuCmp64 { rn, rm } => {
+                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
+                let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
+                format!("fcmp {}, {}", rn, rm)
+            }
+            &Inst::FpuLoad32 { rd, ref mem, .. } => {
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}ldr {}, {}", mem_str, rd, mem)
+            }
+            &Inst::FpuLoad64 { rd, ref mem, .. } => {
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}ldr {}, {}", mem_str, rd, mem)
+            }
+            &Inst::FpuLoad128 { rd, ref mem, .. } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = "q".to_string() + &rd[1..];
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}ldr {}, {}", mem_str, rd, mem)
+            }
+            &Inst::FpuStore32 { rd, ref mem, .. } => {
+                let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size32);
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}str {}, {}", mem_str, rd, mem)
+            }
+            &Inst::FpuStore64 { rd, ref mem, .. } => {
+                let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size64);
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}str {}, {}", mem_str, rd, mem)
+            }
+            &Inst::FpuStore128 { rd, ref mem, .. } => {
+                let rd = rd.show_rru(mb_rru);
+                let rd = "q".to_string() + &rd[1..];
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}str {}, {}", mem_str, rd, mem)
+            }
+            &Inst::LoadFpuConst64 { rd, const_data } => {
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+                format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, f64::from_bits(const_data))
+            }
+            &Inst::LoadFpuConst128 { rd, const_data } => {
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
+                format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
+            }
+            &Inst::FpuToInt { op, rd, rn } => {
+                let (op, sizesrc, sizedest) = match op {
+                    FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
+                    FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
+                    FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
+                    FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
+                    FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
+                    FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
+                    FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
+                    FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
+                };
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest);
+                let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::IntToFpu { op, rd, rn } => {
+                let (op, sizesrc, sizedest) = match op {
+                    IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
+                    IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
+                    IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
+                    IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
+                    IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
+                    IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
+                    IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
+                    IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
+                };
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
+                let rn = show_ireg_sized(rn, mb_rru, sizesrc);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
+                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
+                let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
+                let cond = cond.show_rru(mb_rru);
+                format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
+            }
+            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
+                let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
+                let cond = cond.show_rru(mb_rru);
+                format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
+            }
+            &Inst::FpuRound { op, rd, rn } => {
+                let (inst, size) = match op {
+                    FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
+                    FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
+                    FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
+                    FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
+                    FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
+                    FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
+                    FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
+                    FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
+                };
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+                let rn = show_vreg_scalar(rn, mb_rru, size);
+                format!("{} {}, {}", inst, rd, rn)
+            }
+            &Inst::MovToFpu { rd, rn, size } => {
+                let operand_size = size.operand_size();
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, operand_size);
+                format!("fmov {}, {}", rd, rn)
+            }
+            &Inst::MovToVec { rd, rn, idx, size } => {
+                let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
+                let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
+                format!("mov {}, {}", rd, rn)
+            }
+            &Inst::MovFromVec { rd, rn, idx, size } => {
+                let op = match size {
+                    VectorSize::Size8x16 => "umov",
+                    VectorSize::Size16x8 => "umov",
+                    VectorSize::Size32x4 => "mov",
+                    VectorSize::Size64x2 => "mov",
+                    _ => unimplemented!(),
+                };
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
+                let rn = show_vreg_element(rn, mb_rru, idx, size);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::MovFromVecSigned {
+                rd,
+                rn,
+                idx,
+                size,
+                scalar_size,
+            } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size);
+                let rn = show_vreg_element(rn, mb_rru, idx, size);
+                format!("smov {}, {}", rd, rn)
+            }
+            &Inst::VecDup { rd, rn, size } => {
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
+                format!("dup {}, {}", rd, rn)
+            }
+            &Inst::VecDupFromFpu { rd, rn, size } => {
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+                let rn = show_vreg_element(rn, mb_rru, 0, size);
+                format!("dup {}, {}", rd, rn)
+            }
+            &Inst::VecDupImm { rd, imm, invert, size } => {
+                let imm = imm.show_rru(mb_rru);
+                let op = if invert {
+                    "mvni"
+                } else {
+                    "movi"
+                };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+
+                format!("{} {}, {}", op, rd, imm)
+            }
+            &Inst::VecExtend { t, rd, rn, high_half } => {
+                let (op, dest, src) = match (t, high_half) {
+                    (VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
+                    (VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
+                    (VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
+                    (VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
+                    (VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
+                    (VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
+                    (VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
+                    (VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
+                    (VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
+                    (VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
+                    (VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
+                    (VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
+                };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
+                let rn = show_vreg_vector(rn, mb_rru, src);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::VecMovElement {
+                rd,
+                rn,
+                dest_idx,
+                src_idx,
+                size,
+            } => {
+                let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size);
+                let rn = show_vreg_element(rn, mb_rru, src_idx, size);
+                format!("mov {}, {}", rd, rn)
+            }
+            &Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
+                let dest_size = if high_half {
+                    assert!(size.is_128bits());
+                    size
+                } else {
+                    size.halve()
+                };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
+                let rn = show_vreg_vector(rn, mb_rru, size.widen());
+                let op = match (op, high_half) {
+                    (VecMiscNarrowOp::Xtn, false) => "xtn",
+                    (VecMiscNarrowOp::Xtn, true) => "xtn2",
+                    (VecMiscNarrowOp::Sqxtn, false) => "sqxtn",
+                    (VecMiscNarrowOp::Sqxtn, true) => "sqxtn2",
+                    (VecMiscNarrowOp::Sqxtun, false) => "sqxtun",
+                    (VecMiscNarrowOp::Sqxtun, true) => "sqxtun2",
+                };
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::VecRRR {
+                rd,
+                rn,
+                rm,
+                alu_op,
+                size,
+            } => {
+                let (op, size) = match alu_op {
+                    VecALUOp::Sqadd => ("sqadd", size),
+                    VecALUOp::Uqadd => ("uqadd", size),
+                    VecALUOp::Sqsub => ("sqsub", size),
+                    VecALUOp::Uqsub => ("uqsub", size),
+                    VecALUOp::Cmeq => ("cmeq", size),
+                    VecALUOp::Cmge => ("cmge", size),
+                    VecALUOp::Cmgt => ("cmgt", size),
+                    VecALUOp::Cmhs => ("cmhs", size),
+                    VecALUOp::Cmhi => ("cmhi", size),
+                    VecALUOp::Fcmeq => ("fcmeq", size),
+                    VecALUOp::Fcmgt => ("fcmgt", size),
+                    VecALUOp::Fcmge => ("fcmge", size),
+                    VecALUOp::And => ("and", VectorSize::Size8x16),
+                    VecALUOp::Bic => ("bic", VectorSize::Size8x16),
+                    VecALUOp::Orr => ("orr", VectorSize::Size8x16),
+                    VecALUOp::Eor => ("eor", VectorSize::Size8x16),
+                    VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
+                    VecALUOp::Umaxp => ("umaxp", size),
+                    VecALUOp::Add => ("add", size),
+                    VecALUOp::Sub => ("sub", size),
+                    VecALUOp::Mul => ("mul", size),
+                    VecALUOp::Sshl => ("sshl", size),
+                    VecALUOp::Ushl => ("ushl", size),
+                    VecALUOp::Umin => ("umin", size),
+                    VecALUOp::Smin => ("smin", size),
+                    VecALUOp::Umax => ("umax", size),
+                    VecALUOp::Smax => ("smax", size),
+                    VecALUOp::Urhadd => ("urhadd", size),
+                    VecALUOp::Fadd => ("fadd", size),
+                    VecALUOp::Fsub => ("fsub", size),
+                    VecALUOp::Fdiv => ("fdiv", size),
+                    VecALUOp::Fmax => ("fmax", size),
+                    VecALUOp::Fmin => ("fmin", size),
+                    VecALUOp::Fmul => ("fmul", size),
+                    VecALUOp::Addp => ("addp", size),
+                    VecALUOp::Umlal => ("umlal", size),
+                    VecALUOp::Zip1 => ("zip1", size),
+                    VecALUOp::Smull => ("smull", size),
+                    VecALUOp::Smull2 => ("smull2", size),
+                };
+                let rd_size = match alu_op {
+                    VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
+                    _ => size
+                };
+                let rn_size = match alu_op {
+                    VecALUOp::Smull => size.halve(),
+                    _ => size
+                };
+                let rm_size = rn_size;
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+                let rn = show_vreg_vector(rn, mb_rru, rn_size);
+                let rm = show_vreg_vector(rm, mb_rru, rm_size);
+                format!("{} {}, {}, {}", op, rd, rn, rm)
+            }
+            &Inst::VecMisc { op, rd, rn, size } => {
+                let is_shll = op == VecMisc2::Shll;
+                let suffix = match (is_shll, size) {
+                    (true, VectorSize::Size8x8) => ", #8",
+                    (true, VectorSize::Size16x4) => ", #16",
+                    (true, VectorSize::Size32x2) => ", #32",
+                    _ => "",
+                };
+
+                let (op, size) = match op {
+                    VecMisc2::Not => (
+                        "mvn",
+                        if size.is_128bits() {
+                            VectorSize::Size8x16
+                        } else {
+                            VectorSize::Size8x8
+                        },
+                    ),
+                    VecMisc2::Neg => ("neg", size),
+                    VecMisc2::Abs => ("abs", size),
+                    VecMisc2::Fabs => ("fabs", size),
+                    VecMisc2::Fneg => ("fneg", size),
+                    VecMisc2::Fsqrt => ("fsqrt", size),
+                    VecMisc2::Rev64 => ("rev64", size),
+                    VecMisc2::Shll => ("shll", size),
+                    VecMisc2::Fcvtzs => ("fcvtzs", size),
+                    VecMisc2::Fcvtzu => ("fcvtzu", size),
+                    VecMisc2::Scvtf => ("scvtf", size),
+                    VecMisc2::Ucvtf => ("ucvtf", size),
+                    VecMisc2::Frintn => ("frintn", size),
+                    VecMisc2::Frintz => ("frintz", size),
+                    VecMisc2::Frintm => ("frintm", size),
+                    VecMisc2::Frintp => ("frintp", size),
+                };
+
+                let rd_size = if is_shll { size.widen() } else { size };
+
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+                let rn = show_vreg_vector(rn, mb_rru, size);
+                format!("{} {}, {}{}", op, rd, rn, suffix)
+            }
+            &Inst::VecLanes { op, rd, rn, size } => {
+                let op = match op {
+                    VecLanesOp::Uminv => "uminv",
+                    VecLanesOp::Addv => "addv",
+                };
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
+                let rn = show_vreg_vector(rn, mb_rru, size);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::VecShiftImm { op, rd, rn, size, imm } => {
+                let op = match op {
+                    VecShiftImmOp::Shl => "shl",
+                    VecShiftImmOp::Ushr => "ushr",
+                    VecShiftImmOp::Sshr => "sshr",
+                };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+                let rn = show_vreg_vector(rn, mb_rru, size);
+                format!("{} {}, {}, #{}", op, rd, rn, imm)
+            }
+            &Inst::VecExtract { rd, rn, rm, imm4 } => {
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
+            }
+            &Inst::VecTbl {
+                rd,
+                rn,
+                rm,
+                is_extension,
+            } => {
+                let op = if is_extension { "tbx" } else { "tbl" };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                format!("{} {}, {{ {} }}, {}", op, rd, rn, rm)
+            }
+            &Inst::VecTbl2 {
+                rd,
+                rn,
+                rn2,
+                rm,
+                is_extension,
+            } => {
+                let op = if is_extension { "tbx" } else { "tbl" };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+                let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16);
+                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
+            }
+            &Inst::VecLoadReplicate { rd, rn, size, .. } => {
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+                let rn = rn.show_rru(mb_rru);
+
+                format!("ld1r {{ {} }}, [{}]", rd, rn)
+            }
+            &Inst::VecCSel { rd, rn, rm, cond } => {
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                let cond = cond.show_rru(mb_rru);
+                format!("vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond)
+            }
+            &Inst::MovToNZCV { rn } => {
+                let rn = rn.show_rru(mb_rru);
+                format!("msr nzcv, {}", rn)
+            }
+            &Inst::MovFromNZCV { rd } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                format!("mrs {}, nzcv", rd)
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits >= 8 => {
+                // Is the destination a 32-bit register? Corresponds to whether
+                // extend-to width is <= 32 bits, *unless* we have an unsigned
+                // 32-to-64-bit extension, which is implemented with a "mov" to a
+                // 32-bit (W-reg) dest, because this zeroes the top 32 bits.
+                let dest_size = if !signed && from_bits == 32 && to_bits == 64 {
+                    OperandSize::Size32
+                } else {
+                    OperandSize::from_bits(to_bits)
+                };
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+                let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
+                let op = match (signed, from_bits, to_bits) {
+                    (false, 8, 32) => "uxtb",
+                    (true, 8, 32) => "sxtb",
+                    (false, 16, 32) => "uxth",
+                    (true, 16, 32) => "sxth",
+                    (false, 8, 64) => "uxtb",
+                    (true, 8, 64) => "sxtb",
+                    (false, 16, 64) => "uxth",
+                    (true, 16, 64) => "sxth",
+                    (false, 32, 64) => "mov", // special case (see above).
+                    (true, 32, 64) => "sxtw",
+                    _ => panic!("Unsupported Extend case: {:?}", self),
+                };
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits == 1 && signed => {
+                let dest_size = OperandSize::from_bits(to_bits);
+                let zr = if dest_size.is32() { "wzr" } else { "xzr" };
+                let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+                format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd)
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                ..
+            } if from_bits == 1 && !signed => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+                format!("and {}, {}, #1", rd, rn)
+            }
+            &Inst::Extend { .. } => {
+                panic!("Unsupported Extend case");
+            }
+            &Inst::Call { .. } => format!("bl 0"),
+            &Inst::CallInd { ref info, .. } => {
+                let rn = info.rn.show_rru(mb_rru);
+                format!("blr {}", rn)
+            }
+            &Inst::Ret => "ret".to_string(),
+            &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
+            &Inst::Jump { ref dest } => {
+                let dest = dest.show_rru(mb_rru);
+                format!("b {}", dest)
+            }
+            &Inst::CondBr {
+                ref taken,
+                ref not_taken,
+                ref kind,
+            } => {
+                let taken = taken.show_rru(mb_rru);
+                let not_taken = not_taken.show_rru(mb_rru);
+                match kind {
+                    &CondBrKind::Zero(reg) => {
+                        let reg = reg.show_rru(mb_rru);
+                        format!("cbz {}, {} ; b {}", reg, taken, not_taken)
+                    }
+                    &CondBrKind::NotZero(reg) => {
+                        let reg = reg.show_rru(mb_rru);
+                        format!("cbnz {}, {} ; b {}", reg, taken, not_taken)
+                    }
+                    &CondBrKind::Cond(c) => {
+                        let c = c.show_rru(mb_rru);
+                        format!("b.{} {} ; b {}", c, taken, not_taken)
+                    }
+                }
+            }
+            &Inst::IndirectBr { rn, .. } => {
+                let rn = rn.show_rru(mb_rru);
+                format!("br {}", rn)
+            }
+            &Inst::Brk => "brk #0".to_string(),
+            &Inst::Udf { .. } => "udf".to_string(),
+            &Inst::TrapIf { ref kind, .. } => match kind {
+                &CondBrKind::Zero(reg) => {
+                    let reg = reg.show_rru(mb_rru);
+                    format!("cbnz {}, 8 ; udf", reg)
+                }
+                &CondBrKind::NotZero(reg) => {
+                    let reg = reg.show_rru(mb_rru);
+                    format!("cbz {}, 8 ; udf", reg)
+                }
+                &CondBrKind::Cond(c) => {
+                    let c = c.invert().show_rru(mb_rru);
+                    format!("b.{} 8 ; udf", c)
+                }
+            },
+            &Inst::Adr { rd, off } => {
+                let rd = rd.show_rru(mb_rru);
+                format!("adr {}, pc+{}", rd, off)
+            }
+            &Inst::Word4 { data } => format!("data.i32 {}", data),
+            &Inst::Word8 { data } => format!("data.i64 {}", data),
+            &Inst::JTSequence {
+                ref info,
+                ridx,
+                rtmp1,
+                rtmp2,
+                ..
+            } => {
+                let ridx = ridx.show_rru(mb_rru);
+                let rtmp1 = rtmp1.show_rru(mb_rru);
+                let rtmp2 = rtmp2.show_rru(mb_rru);
+                let default_target = info.default_target.show_rru(mb_rru);
+                format!(
+                    concat!(
+                        "b.hs {} ; ",
+                        "adr {}, pc+16 ; ",
+                        "ldrsw {}, [{}, {}, LSL 2] ; ",
+                        "add {}, {}, {} ; ",
+                        "br {} ; ",
+                        "jt_entries {:?}"
+                    ),
+                    default_target,
+                    rtmp1,
+                    rtmp2,
+                    rtmp1,
+                    ridx,
+                    rtmp1,
+                    rtmp1,
+                    rtmp2,
+                    rtmp1,
+                    info.targets
+                )
+            }
+            &Inst::LoadExtName {
+                rd,
+                ref name,
+                offset,
+            } => {
+                let rd = rd.show_rru(mb_rru);
+                format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
+            }
+            &Inst::LoadAddr { rd, ref mem } => {
+                // TODO: we really should find a better way to avoid duplication of
+                // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
+                // expansion stage (i.e., legalization, but without the slow edit-in-place
+                // of the existing legalization framework).
+                let (mem_insts, mem) = mem_finalize(0, mem, state);
+                let mut ret = String::new();
+                for inst in mem_insts.into_iter() {
+                    ret.push_str(&inst.show_rru(mb_rru));
+                }
+                let (reg, offset) = match mem {
+                    AMode::Unscaled(r, simm9) => (r, simm9.value()),
+                    AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
+                    _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
+                };
+                let abs_offset = if offset < 0 {
+                    -offset as u64
+                } else {
+                    offset as u64
+                };
+                let alu_op = if offset < 0 {
+                    ALUOp::Sub64
+                } else {
+                    ALUOp::Add64
+                };
+
+                if offset == 0 {
+                    let mov = Inst::mov(rd, reg);
+                    ret.push_str(&mov.show_rru(mb_rru));
+                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
+                    let add = Inst::AluRRImm12 {
+                        alu_op,
+                        rd,
+                        rn: reg,
+                        imm12,
+                    };
+                    ret.push_str(&add.show_rru(mb_rru));
+                } else {
+                    let tmp = writable_spilltmp_reg();
+                    for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
+                        ret.push_str(&inst.show_rru(mb_rru));
+                    }
+                    let add = Inst::AluRRR {
+                        alu_op,
+                        rd,
+                        rn: reg,
+                        rm: tmp.to_reg(),
+                    };
+                    ret.push_str(&add.show_rru(mb_rru));
+                }
+                ret
+            }
+            &Inst::VirtualSPOffsetAdj { offset } => {
+                state.virtual_sp_offset += offset;
+                format!("virtual_sp_offset_adjust {}", offset)
+            }
+            &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space),
+        }
+    }
+}
+
+//=============================================================================
+// Label fixups and jump veneers.
+
+/// Different forms of label references for different instruction formats.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LabelUse {
+    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
+    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
+    Branch19,
+    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
+    /// signed bits, in bits 25:0. Used by b, bl.
+    Branch26,
+    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
+    /// in bits 23:5.
+    Ldr19,
+    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
+    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
+    Adr21,
+    /// 32-bit PC relative constant offset (from address of constant itself),
+    /// signed. Used in jump tables.
+    PCRel32,
+}
+
+impl MachInstLabelUse for LabelUse {
+    /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
+    const ALIGN: CodeOffset = 4;
+
+    /// Maximum PC-relative range (positive), inclusive.
+    fn max_pos_range(self) -> CodeOffset {
+        match self {
+            // 19-bit immediate, left-shifted by 2, for 21 bits of total range. Signed, so +2^20
+            // from zero. Likewise for two other shifted cases below.
+            LabelUse::Branch19 => (1 << 20) - 1,
+            LabelUse::Branch26 => (1 << 27) - 1,
+            LabelUse::Ldr19 => (1 << 20) - 1,
+            // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
+            // range.
+            LabelUse::Adr21 => (1 << 20) - 1,
+            LabelUse::PCRel32 => 0x7fffffff,
+        }
+    }
+
+    /// Maximum PC-relative range (negative).
+    fn max_neg_range(self) -> CodeOffset {
+        // All forms are twos-complement signed offsets, so negative limit is one more than
+        // positive limit.
+        self.max_pos_range() + 1
+    }
+
+    /// Size of window into code needed to do the patch.
+    fn patch_size(self) -> CodeOffset {
+        // Patch is on one instruction only for all of these label reference types.
+        4
+    }
+
+    /// Perform the patch.
+    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+        let pc_rel = (label_offset as i64) - (use_offset as i64);
+        debug_assert!(pc_rel <= self.max_pos_range() as i64);
+        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
+        let pc_rel = pc_rel as u32;
+        let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+        let mask = match self {
+            LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
+            LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
+            LabelUse::Ldr19 => 0x00ffffe0,    // bits 23..5 inclusive
+            LabelUse::Adr21 => 0x60ffffe0,    // bits 30..29, 25..5 inclusive
+            LabelUse::PCRel32 => 0xffffffff,
+        };
+        let pc_rel_shifted = match self {
+            LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
+            _ => {
+                debug_assert!(pc_rel & 3 == 0);
+                pc_rel >> 2
+            }
+        };
+        let pc_rel_inserted = match self {
+            LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
+            LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
+            LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10,
+            LabelUse::PCRel32 => pc_rel_shifted,
+        };
+        let is_add = match self {
+            LabelUse::PCRel32 => true,
+            _ => false,
+        };
+        let insn_word = if is_add {
+            insn_word.wrapping_add(pc_rel_inserted)
+        } else {
+            (insn_word & !mask) | pc_rel_inserted
+        };
+        buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
+    }
+
+    /// Is a veneer supported for this label reference type?
+    fn supports_veneer(self) -> bool {
+        match self {
+            LabelUse::Branch19 => true, // veneer is a Branch26
+            _ => false,
+        }
+    }
+
+    /// How large is the veneer, if supported?
+    fn veneer_size(self) -> CodeOffset {
+        4
+    }
+
+    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
+    /// an offset and label-use for the veneer's use of the original label.
+    fn generate_veneer(
+        self,
+        buffer: &mut [u8],
+        veneer_offset: CodeOffset,
+    ) -> (CodeOffset, LabelUse) {
+        match self {
+            LabelUse::Branch19 => {
+                // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
+                // bother with constructing an Inst.
+                let insn_word = 0b000101 << 26;
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
+                (veneer_offset, LabelUse::Branch26)
+            }
+            _ => panic!("Unsupported label-reference type for veneer generation!"),
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
new file mode 100644
index 0000000000..0b4babe04a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
@@ -0,0 +1,351 @@
+//! AArch64 ISA definitions: registers.
+
+use crate::isa::aarch64::inst::OperandSize;
+use crate::isa::aarch64::inst::ScalarSize;
+use crate::isa::aarch64::inst::VectorSize;
+use crate::settings;
+
+use regalloc::{
+    PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES,
+};
+
+use std::string::{String, ToString};
+
+//=============================================================================
+// Registers, the Universe thereof, and printing
+
+/// The pinned register on this architecture.
+/// It must be the same as Spidermonkey's HeapReg, as found in this file.
+/// https://searchfox.org/mozilla-central/source/js/src/jit/arm64/Assembler-arm64.h#103
+pub const PINNED_REG: u8 = 21;
+
+#[rustfmt::skip]
+const XREG_INDICES: [u8; 31] = [
+    // X0 - X7
+    32, 33, 34, 35, 36, 37, 38, 39,
+    // X8 - X15
+    40, 41, 42, 43, 44, 45, 46, 47,
+    // X16, X17
+    58, 59,
+    // X18
+    60,
+    // X19, X20
+    48, 49,
+    // X21, put aside because it's the pinned register.
+    57,
+    // X22 - X28
+    50, 51, 52, 53, 54, 55, 56,
+    // X29 (FP)
+    61,
+    // X30 (LR)
+    62,
+];
+
+const ZERO_REG_INDEX: u8 = 63;
+
+const SP_REG_INDEX: u8 = 64;
+
+/// Get a reference to an X-register (integer register).
+pub fn xreg(num: u8) -> Reg {
+    assert!(num < 31);
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ num,
+        /* index = */ XREG_INDICES[num as usize],
+    )
+}
+
+/// Get a writable reference to an X-register.
+pub fn writable_xreg(num: u8) -> Writable<Reg> {
+    Writable::from_reg(xreg(num))
+}
+
+/// Get a reference to a V-register (vector/FP register).
+pub fn vreg(num: u8) -> Reg {
+    assert!(num < 32);
+    Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
+}
+
+/// Get a writable reference to a V-register.
+pub fn writable_vreg(num: u8) -> Writable<Reg> {
+    Writable::from_reg(vreg(num))
+}
+
+/// Get a reference to the zero-register.
+pub fn zero_reg() -> Reg {
+    // This should be the same as what xreg(31) returns, except that
+    // we use the special index into the register index space.
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ 31,
+        /* index = */ ZERO_REG_INDEX,
+    )
+}
+
+/// Get a writable reference to the zero-register (this discards a result).
+pub fn writable_zero_reg() -> Writable<Reg> {
+    Writable::from_reg(zero_reg())
+}
+
+/// Get a reference to the stack-pointer register.
+pub fn stack_reg() -> Reg {
+    // XSP (stack) and XZR (zero) are logically different registers which have
+    // the same hardware encoding, and whose meaning, in real aarch64
+    // instructions, is context-dependent.  For convenience of
+    // universe-construction and for correct printing, we make them be two
+    // different real registers.
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ 31,
+        /* index = */ SP_REG_INDEX,
+    )
+}
+
+/// Get a writable reference to the stack-pointer register.
+pub fn writable_stack_reg() -> Writable<Reg> {
+    Writable::from_reg(stack_reg())
+}
+
+/// Get a reference to the link register (x30).
+pub fn link_reg() -> Reg {
+    xreg(30)
+}
+
+/// Get a writable reference to the link register.
+pub fn writable_link_reg() -> Writable<Reg> {
+    Writable::from_reg(link_reg())
+}
+
+/// Get a reference to the frame pointer (x29).
+pub fn fp_reg() -> Reg {
+    xreg(29)
+}
+
+/// Get a writable reference to the frame pointer.
+pub fn writable_fp_reg() -> Writable<Reg> {
+    Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
+/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
+/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
+/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
+/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
+///
+/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
+/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
+/// to live through call instructions.
+pub fn spilltmp_reg() -> Reg {
+    xreg(16)
+}
+
+/// Get a writable reference to the spilltmp reg.
+pub fn writable_spilltmp_reg() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg())
+}
+
+/// Get a reference to the second temp register. We need this in some edge cases
+/// where we need both the spilltmp and another temporary.
+///
+/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
+/// free to use otherwise.
+pub fn tmp2_reg() -> Reg {
+    xreg(17)
+}
+
+/// Get a writable reference to the tmp2 reg.
+pub fn writable_tmp2_reg() -> Writable<Reg> {
+    Writable::from_reg(tmp2_reg())
+}
+
+/// Create the register universe for AArch64.
+pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
+    let mut regs = vec![];
+    let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+    // Numbering Scheme: we put V-regs first, then X-regs. The X-regs exclude several registers:
+    // x18 (globally reserved for platform-specific purposes), x29 (frame pointer), x30 (link
+    // register), x31 (stack pointer or zero register, depending on context).
+
+    let v_reg_base = 0u8; // in contiguous real-register index space
+    let v_reg_count = 32;
+    for i in 0u8..v_reg_count {
+        let reg = Reg::new_real(
+            RegClass::V128,
+            /* enc = */ i,
+            /* index = */ v_reg_base + i,
+        )
+        .to_real_reg();
+        let name = format!("v{}", i);
+        regs.push((reg, name));
+    }
+    let v_reg_last = v_reg_base + v_reg_count - 1;
+
+    // Add the X registers. N.B.: the order here must match the order implied
+    // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
+
+    let x_reg_base = 32u8; // in contiguous real-register index space
+    let mut x_reg_count = 0;
+
+    let uses_pinned_reg = flags.enable_pinned_reg();
+
+    for i in 0u8..32u8 {
+        // See above for excluded registers.
+        if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
+            continue;
+        }
+        let reg = Reg::new_real(
+            RegClass::I64,
+            /* enc = */ i,
+            /* index = */ x_reg_base + x_reg_count,
+        )
+        .to_real_reg();
+        let name = format!("x{}", i);
+        regs.push((reg, name));
+        x_reg_count += 1;
+    }
+    let x_reg_last = x_reg_base + x_reg_count - 1;
+
+    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+        first: x_reg_base as usize,
+        last: x_reg_last as usize,
+        suggested_scratch: Some(XREG_INDICES[19] as usize),
+    });
+    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
+        first: v_reg_base as usize,
+        last: v_reg_last as usize,
+        suggested_scratch: Some(/* V31: */ 31),
+    });
+
+    // Other regs, not available to the allocator.
+    let allocable = if uses_pinned_reg {
+        // The pinned register is not allocatable in this case, so record the length before adding
+        // it.
+        let len = regs.len();
+        regs.push((xreg(PINNED_REG).to_real_reg(), "x21/pinned_reg".to_string()));
+        len
+    } else {
+        regs.push((xreg(PINNED_REG).to_real_reg(), "x21".to_string()));
+        regs.len()
+    };
+
+    regs.push((xreg(16).to_real_reg(), "x16".to_string()));
+    regs.push((xreg(17).to_real_reg(), "x17".to_string()));
+    regs.push((xreg(18).to_real_reg(), "x18".to_string()));
+    regs.push((fp_reg().to_real_reg(), "fp".to_string()));
+    regs.push((link_reg().to_real_reg(), "lr".to_string()));
+    regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
+    regs.push((stack_reg().to_real_reg(), "sp".to_string()));
+
+    // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
+    // to 65, which is potentially inconvenient from a compiler performance
+    // standpoint.  We could possibly drop back to 64 by "losing" a vector
+    // register in future.
+
+    // Assert sanity: the indices in the register structs must match their
+    // actual indices in the array.
+    for (i, reg) in regs.iter().enumerate() {
+        assert_eq!(i, reg.0.get_index());
+    }
+
+    RealRegUniverse {
+        regs,
+        allocable,
+        allocable_by_class,
+    }
+}
+
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
+/// its name at the 32-bit size.
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String {
+    let mut s = reg.show_rru(mb_rru);
+    if reg.get_class() != RegClass::I64 || !size.is32() {
+        // We can't do any better.
+        return s;
+    }
+
+    if reg.is_real() {
+        // Change (eg) "x42" into "w42" as appropriate
+        if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
+            s = "w".to_string() + &s[1..];
+        }
+    } else {
+        // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
+        if reg.get_class() == RegClass::I64 && size.is32() {
+            s.push('w');
+        }
+    }
+    s
+}
+
+/// Show a vector register used in a scalar context.
+pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String {
+    let mut s = reg.show_rru(mb_rru);
+    if reg.get_class() != RegClass::V128 {
+        // We can't do any better.
+        return s;
+    }
+
+    if reg.is_real() {
+        // Change (eg) "v0" into "d0".
+        if s.starts_with("v") {
+            let replacement = match size {
+                ScalarSize::Size8 => "b",
+                ScalarSize::Size16 => "h",
+                ScalarSize::Size32 => "s",
+                ScalarSize::Size64 => "d",
+                ScalarSize::Size128 => "q",
+            };
+            s.replace_range(0..1, replacement);
+        }
+    } else {
+        // Add a "d" suffix to RegClass::V128 vregs.
+        if reg.get_class() == RegClass::V128 {
+            s.push('d');
+        }
+    }
+    s
+}
+
+/// Show a vector register.
+pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
+    assert_eq!(RegClass::V128, reg.get_class());
+    let mut s = reg.show_rru(mb_rru);
+
+    let suffix = match size {
+        VectorSize::Size8x8 => ".8b",
+        VectorSize::Size8x16 => ".16b",
+        VectorSize::Size16x4 => ".4h",
+        VectorSize::Size16x8 => ".8h",
+        VectorSize::Size32x2 => ".2s",
+        VectorSize::Size32x4 => ".4s",
+        VectorSize::Size64x2 => ".2d",
+    };
+
+    s.push_str(suffix);
+    s
+}
+
+/// Show an indexed vector element.
+pub fn show_vreg_element(
+    reg: Reg,
+    mb_rru: Option<&RealRegUniverse>,
+    idx: u8,
+    size: VectorSize,
+) -> String {
+    assert_eq!(RegClass::V128, reg.get_class());
+    let mut s = reg.show_rru(mb_rru);
+
+    let suffix = match size {
+        VectorSize::Size8x8 => "b",
+        VectorSize::Size8x16 => "b",
+        VectorSize::Size16x4 => "h",
+        VectorSize::Size16x8 => "h",
+        VectorSize::Size32x2 => "s",
+        VectorSize::Size32x4 => "s",
+        VectorSize::Size64x2 => "d",
+    };
+
+    s.push_str(&format!(".{}[{}]", suffix, idx));
+    s
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs
new file mode 100644
index 0000000000..698e094795
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs
@@ -0,0 +1,201 @@
+use super::*;
+use crate::isa::aarch64::inst::{args::PairAMode, imms::Imm12, regs, ALUOp, Inst};
+use crate::isa::unwind::input::{UnwindCode, UnwindInfo};
+use crate::machinst::UnwindInfoContext;
+use crate::result::CodegenResult;
+use alloc::vec::Vec;
+use regalloc::Reg;
+
+#[cfg(feature = "unwind")]
+pub(crate) mod systemv;
+
+pub struct AArch64UnwindInfo;
+
+impl UnwindInfoGenerator<Inst> for AArch64UnwindInfo {
+    fn create_unwind_info(
+        context: UnwindInfoContext<Inst>,
+    ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
+        let word_size = 8u8;
+        let pair_size = word_size * 2;
+        let mut codes = Vec::new();
+
+        for i in context.prologue.clone() {
+            let i = i as usize;
+            let inst = &context.insts[i];
+            let offset = context.insts_layout[i];
+
+            match inst {
+                Inst::StoreP64 {
+                    rt,
+                    rt2,
+                    mem: PairAMode::PreIndexed(rn, imm7),
+                    ..
+                } if *rt == regs::fp_reg()
+                    && *rt2 == regs::link_reg()
+                    && *rn == regs::writable_stack_reg()
+                    && imm7.value == -(pair_size as i16) =>
+                {
+                    // stp fp (x29), lr (x30), [sp, #-16]!
+                    codes.push((
+                        offset,
+                        UnwindCode::StackAlloc {
+                            size: pair_size as u32,
+                        },
+                    ));
+                    codes.push((
+                        offset,
+                        UnwindCode::SaveRegister {
+                            reg: *rt,
+                            stack_offset: 0,
+                        },
+                    ));
+                    codes.push((
+                        offset,
+                        UnwindCode::SaveRegister {
+                            reg: *rt2,
+                            stack_offset: word_size as u32,
+                        },
+                    ));
+                }
+                Inst::StoreP64 {
+                    rt,
+                    rt2,
+                    mem: PairAMode::PreIndexed(rn, imm7),
+                    ..
+                } if rn.to_reg() == regs::stack_reg() && imm7.value % (pair_size as i16) == 0 => {
+                    // stp r1, r2, [sp, #(i * #16)]
+                    let stack_offset = imm7.value as u32;
+                    codes.push((
+                        offset,
+                        UnwindCode::SaveRegister {
+                            reg: *rt,
+                            stack_offset,
+                        },
+                    ));
+                    if *rt2 != regs::zero_reg() {
+                        codes.push((
+                            offset,
+                            UnwindCode::SaveRegister {
+                                reg: *rt2,
+                                stack_offset: stack_offset + word_size as u32,
+                            },
+                        ));
+                    }
+                }
+                Inst::AluRRImm12 {
+                    alu_op: ALUOp::Add64,
+                    rd,
+                    rn,
+                    imm12:
+                        Imm12 {
+                            bits: 0,
+                            shift12: false,
+                        },
+                } if *rd == regs::writable_fp_reg() && *rn == regs::stack_reg() => {
+                    // mov fp (x29), sp.
+                    codes.push((offset, UnwindCode::SetFramePointer { reg: rd.to_reg() }));
+                }
+                Inst::VirtualSPOffsetAdj { offset: adj } if offset > 0 => {
+                    codes.push((offset, UnwindCode::StackAlloc { size: *adj as u32 }));
+                }
+                _ => {}
+            }
+        }
+
+        // TODO epilogues
+
+        let prologue_size = if context.prologue.is_empty() {
+            0
+        } else {
+            context.insts_layout[context.prologue.end as usize - 1]
+        };
+
+        Ok(Some(UnwindInfo {
+            prologue_size,
+            prologue_unwind_codes: codes,
+            epilogues_unwind_codes: vec![],
+            function_size: context.len,
+            word_size,
+            initial_sp_offset: 0,
+        }))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::{ExternalName, Function, InstBuilder, Signature, StackSlotData, StackSlotKind};
+    use crate::isa::{lookup, CallConv};
+    use crate::settings::{builder, Flags};
+    use crate::Context;
+    use std::str::FromStr;
+    use target_lexicon::triple;
+
+    #[test]
+    fn test_simple_func() {
+        let isa = lookup(triple!("aarch64"))
+            .expect("expect aarch64 ISA")
+            .finish(Flags::new(builder()));
+
+        let mut context = Context::for_function(create_function(
+            CallConv::SystemV,
+            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+        ));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let result = context.mach_compile_result.unwrap();
+        let unwind_info = result.unwind_info.unwrap();
+
+        assert_eq!(
+            unwind_info,
+            UnwindInfo {
+                prologue_size: 12,
+                prologue_unwind_codes: vec![
+                    (4, UnwindCode::StackAlloc { size: 16 }),
+                    (
+                        4,
+                        UnwindCode::SaveRegister {
+                            reg: regs::fp_reg(),
+                            stack_offset: 0
+                        }
+                    ),
+                    (
+                        4,
+                        UnwindCode::SaveRegister {
+                            reg: regs::link_reg(),
+                            stack_offset: 8
+                        }
+                    ),
+                    (
+                        8,
+                        UnwindCode::SetFramePointer {
+                            reg: regs::fp_reg()
+                        }
+                    )
+                ],
+                epilogues_unwind_codes: vec![],
+                function_size: 24,
+                word_size: 8,
+                initial_sp_offset: 0,
+            }
+        );
+    }
+
+    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+        let mut func =
+            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+        let block0 = func.dfg.make_block();
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().return_(&[]);
+
+        if let Some(stack_slot) = stack_slot {
+            func.stack_slots.push(stack_slot);
+        }
+
+        func
+    }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs
new file mode 100644
index 0000000000..b988314b1b
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs
@@ -0,0 +1,158 @@
+//! Unwind information for System V ABI (Aarch64).
+
+use crate::isa::aarch64::inst::regs;
+use crate::isa::unwind::input;
+use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
+use crate::result::CodegenResult;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
+use regalloc::{Reg, RegClass};
+
+/// Creates a new aarch64 common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+    use gimli::write::CallFrameInstruction;
+
+    let mut entry = CommonInformationEntry::new(
+        Encoding {
+            address_size: 8,
+            format: Format::Dwarf32,
+            version: 1,
+        },
+        4,  // Code alignment factor
+        -8, // Data alignment factor
+        Register(regs::link_reg().get_hw_encoding().into()),
+    );
+
+    // Every frame will start with the call frame address (CFA) at SP
+    let sp = Register(regs::stack_reg().get_hw_encoding().into());
+    entry.add_instruction(CallFrameInstruction::Cfa(sp, 0));
+
+    entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+    match reg.get_class() {
+        RegClass::I64 => Ok(Register(reg.get_hw_encoding().into())),
+        _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
+    }
+}
+
+pub(crate) fn create_unwind_info(
+    unwind: input::UnwindInfo<Reg>,
+) -> CodegenResult<Option<UnwindInfo>> {
+    struct RegisterMapper;
+    impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+        fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+            Ok(map_reg(reg)?.0)
+        }
+        fn sp(&self) -> u16 {
+            regs::stack_reg().get_hw_encoding().into()
+        }
+    }
+    let map = RegisterMapper;
+    Ok(Some(UnwindInfo::build(unwind, &map)?))
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::{
+        types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
+        StackSlotKind,
+    };
+    use crate::isa::{lookup, CallConv};
+    use crate::settings::{builder, Flags};
+    use crate::Context;
+    use gimli::write::Address;
+    use std::str::FromStr;
+    use target_lexicon::triple;
+
+    #[test]
+    fn test_simple_func() {
+        let isa = lookup(triple!("aarch64"))
+            .expect("expect aarch64 ISA")
+            .finish(Flags::new(builder()));
+
+        let mut context = Context::for_function(create_function(
+            CallConv::SystemV,
+            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+        ));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let fde = match context
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(1234))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+    }
+
+    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+        let mut func =
+            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+        let block0 = func.dfg.make_block();
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().return_(&[]);
+
+        if let Some(stack_slot) = stack_slot {
+            func.stack_slots.push(stack_slot);
+        }
+
+        func
+    }
+
+    #[test]
+    fn test_multi_return_func() {
+        let isa = lookup(triple!("aarch64"))
+            .expect("expect aarch64 ISA")
+            .finish(Flags::new(builder()));
+
+        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let fde = match context
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(4321))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 40, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+    }
+
+    fn create_multi_return_function(call_conv: CallConv) -> Function {
+        let mut sig = Signature::new(call_conv);
+        sig.params.push(AbiParam::new(types::I32));
+        let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+        let block0 = func.dfg.make_block();
+        let v0 = func.dfg.append_block_param(block0, types::I32);
+        let block1 = func.dfg.make_block();
+        let block2 = func.dfg.make_block();
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().brnz(v0, block2, &[]);
+        pos.ins().jump(block1, &[]);
+
+        pos.insert_block(block1);
+        pos.ins().return_(&[]);
+
+        pos.insert_block(block2);
+        pos.ins().return_(&[]);
+
+        func
+    }
+}