Adding upstream version 115.7.0esr.upstream/115.7.0esr upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
commit: 36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree: 105e8c98ddea1c1e4784a60a5a6410fa416be2de /js/src/jit/arm
parent: Initial commit. (diff)
download: firefox-esr-upstream.tar.xz
firefox-esr-upstream.zip
84 files changed, 97702 insertions, 0 deletions
diff --git a/js/src/jit/arm/Architecture-arm.cpp b/js/src/jit/arm/Architecture-arm.cpp
new file mode 100644
index 0000000000..d4c5026705
--- /dev/null
+++ b/js/src/jit/arm/Architecture-arm.cpp
@@ -0,0 +1,540 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/Architecture-arm.h"
+
+#if !defined(JS_SIMULATOR_ARM) && !defined(__APPLE__)
+#  include <elf.h>
+#endif
+
+#include <fcntl.h>
+#ifdef XP_UNIX
+#  include <unistd.h>
+#endif
+
+#if defined(XP_IOS)
+#  include <libkern/OSCacheControl.h>
+#endif
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/arm/Simulator-arm.h"
+#include "jit/FlushICache.h"  // js::jit::FlushICache
+#include "jit/RegisterSets.h"
+
+#if !defined(__linux__) || defined(ANDROID) || defined(JS_SIMULATOR_ARM)
+// The Android NDK and B2G do not include the hwcap.h kernel header, and it is
+// not defined when building the simulator, so inline the header defines we
+// need.
+#  define HWCAP_VFP (1 << 6)
+#  define HWCAP_NEON (1 << 12)
+#  define HWCAP_VFPv3 (1 << 13)
+#  define HWCAP_VFPv3D16 (1 << 14) /* also set for VFPv4-D16 */
+#  define HWCAP_VFPv4 (1 << 16)
+#  define HWCAP_IDIVA (1 << 17)
+#  define HWCAP_IDIVT (1 << 18)
+#  define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */
+#  define AT_HWCAP 16
+#else
+#  include <asm/hwcap.h>
+#  if !defined(HWCAP_IDIVA)
+#    define HWCAP_IDIVA (1 << 17)
+#  endif
+#  if !defined(HWCAP_VFPD32)
+#    define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */
+#  endif
+#endif
+
+namespace js {
+namespace jit {
+
+// Parse the Linux kernel cpuinfo features. This is also used to parse the
+// override features which has some extensions: 'armv7', 'align' and 'hardfp'.
+static uint32_t ParseARMCpuFeatures(const char* features,
+                                    bool override = false) {
+  uint32_t flags = 0;
+
+  // For ease of running tests we want it to be the default to fixup faults.
+  bool fixupAlignmentFault = true;
+
+  for (;;) {
+    char ch = *features;
+    if (!ch) {
+      // End of string.
+      break;
+    }
+    if (ch == ' ' || ch == ',') {
+      // Skip separator characters.
+      features++;
+      continue;
+    }
+    // Find the end of the token.
+    const char* end = features + 1;
+    for (;; end++) {
+      ch = *end;
+      if (!ch || ch == ' ' || ch == ',') {
+        break;
+      }
+    }
+    size_t count = end - features;
+    if (count == 3 && strncmp(features, "vfp", 3) == 0) {
+      flags |= HWCAP_VFP;
+    } else if (count == 5 && strncmp(features, "vfpv2", 5) == 0) {
+      flags |= HWCAP_VFP;  // vfpv2 is the same as vfp
+    } else if (count == 4 && strncmp(features, "neon", 4) == 0) {
+      flags |= HWCAP_NEON;
+    } else if (count == 5 && strncmp(features, "vfpv3", 5) == 0) {
+      flags |= HWCAP_VFPv3;
+    } else if (count == 8 && strncmp(features, "vfpv3d16", 8) == 0) {
+      flags |= HWCAP_VFPv3D16;
+    } else if (count == 5 && strncmp(features, "vfpv4", 5) == 0) {
+      flags |= HWCAP_VFPv4;
+    } else if (count == 5 && strncmp(features, "idiva", 5) == 0) {
+      flags |= HWCAP_IDIVA;
+    } else if (count == 5 && strncmp(features, "idivt", 5) == 0) {
+      flags |= HWCAP_IDIVT;
+    } else if (count == 6 && strncmp(features, "vfpd32", 6) == 0) {
+      flags |= HWCAP_VFPD32;
+    } else if (count == 5 && strncmp(features, "armv7", 5) == 0) {
+      flags |= HWCAP_ARMv7;
+    } else if (count == 5 && strncmp(features, "align", 5) == 0) {
+      flags |= HWCAP_ALIGNMENT_FAULT | HWCAP_FIXUP_FAULT;
+#if defined(JS_SIMULATOR_ARM)
+    } else if (count == 7 && strncmp(features, "nofixup", 7) == 0) {
+      fixupAlignmentFault = false;
+    } else if (count == 6 && strncmp(features, "hardfp", 6) == 0) {
+      flags |= HWCAP_USE_HARDFP_ABI;
+#endif
+    } else if (override) {
+      fprintf(stderr, "Warning: unexpected ARM feature at: %s\n", features);
+    }
+    features = end;
+  }
+
+  if (!fixupAlignmentFault) {
+    flags &= ~HWCAP_FIXUP_FAULT;
+  }
+
+  return flags;
+}
+
+static uint32_t CanonicalizeARMHwCapFlags(uint32_t flags) {
+  // Canonicalize the flags. These rules are also applied to the features
+  // supplied for simulation.
+
+  // VFPv3 is a subset of VFPv4, force this if the input string omits it.
+  if (flags & HWCAP_VFPv4) {
+    flags |= HWCAP_VFPv3;
+  }
+
+  // The VFPv3 feature is expected when the VFPv3D16 is reported, but add it
+  // just in case of a kernel difference in feature reporting.
+  if (flags & HWCAP_VFPv3D16) {
+    flags |= HWCAP_VFPv3;
+  }
+
+  // VFPv2 is a subset of VFPv3, force this if the input string omits it.  VFPv2
+  // is just an alias for VFP.
+  if (flags & HWCAP_VFPv3) {
+    flags |= HWCAP_VFP;
+  }
+
+  // If we have Neon we have floating point.
+  if (flags & HWCAP_NEON) {
+    flags |= HWCAP_VFP;
+  }
+
+  // If VFPv3 or Neon is supported then this must be an ARMv7.
+  if (flags & (HWCAP_VFPv3 | HWCAP_NEON)) {
+    flags |= HWCAP_ARMv7;
+  }
+
+  // Some old kernels report VFP and not VFPv3, but if ARMv7 then it must be
+  // VFPv3.
+  if ((flags & HWCAP_VFP) && (flags & HWCAP_ARMv7)) {
+    flags |= HWCAP_VFPv3;
+  }
+
+  // Older kernels do not implement the HWCAP_VFPD32 flag.
+  if ((flags & HWCAP_VFPv3) && !(flags & HWCAP_VFPv3D16)) {
+    flags |= HWCAP_VFPD32;
+  }
+
+  return flags;
+}
+
+#if !defined(JS_SIMULATOR_ARM) && (defined(__linux__) || defined(ANDROID))
+static bool forceDoubleCacheFlush = false;
+#endif
+
+// The override flags parsed from the ARMHWCAP environment variable or from the
+// --arm-hwcap js shell argument.  They are stable after startup: there is no
+// longer a programmatic way of setting these from JS.
+volatile uint32_t armHwCapFlags = HWCAP_UNINITIALIZED;
+
+bool CPUFlagsHaveBeenComputed() { return armHwCapFlags != HWCAP_UNINITIALIZED; }
+
+static const char* gArmHwCapString = nullptr;
+
+void SetARMHwCapFlagsString(const char* armHwCap) {
+  MOZ_ASSERT(!CPUFlagsHaveBeenComputed());
+  gArmHwCapString = armHwCap;
+}
+
+static void ParseARMHwCapFlags(const char* armHwCap) {
+  MOZ_ASSERT(armHwCap);
+
+  if (strstr(armHwCap, "help")) {
+    fflush(NULL);
+    printf(
+        "\n"
+        "usage: ARMHWCAP=option,option,option,... where options can be:\n"
+        "\n"
+        "  vfp      \n"
+        "  neon     \n"
+        "  vfpv3    \n"
+        "  vfpv3d16 \n"
+        "  vfpv4    \n"
+        "  idiva    \n"
+        "  idivt    \n"
+        "  vfpd32   \n"
+        "  armv7    \n"
+        "  align    - unaligned accesses will trap and be emulated\n"
+#ifdef JS_SIMULATOR_ARM
+        "  nofixup  - disable emulation of unaligned accesses\n"
+        "  hardfp   \n"
+#endif
+        "\n");
+    exit(0);
+    /*NOTREACHED*/
+  }
+
+  uint32_t flags = ParseARMCpuFeatures(armHwCap, /* override = */ true);
+
+#ifdef JS_CODEGEN_ARM_HARDFP
+  flags |= HWCAP_USE_HARDFP_ABI;
+#endif
+
+  armHwCapFlags = CanonicalizeARMHwCapFlags(flags);
+  JitSpew(JitSpew_Codegen, "ARM HWCAP: 0x%x\n", armHwCapFlags);
+}
+
+void InitARMFlags() {
+  MOZ_RELEASE_ASSERT(armHwCapFlags == HWCAP_UNINITIALIZED);
+
+  if (const char* env = getenv("ARMHWCAP")) {
+    ParseARMHwCapFlags(env);
+    return;
+  }
+
+  if (gArmHwCapString) {
+    ParseARMHwCapFlags(gArmHwCapString);
+    return;
+  }
+
+  uint32_t flags = 0;
+#ifdef JS_SIMULATOR_ARM
+  // HWCAP_FIXUP_FAULT is on by default even if HWCAP_ALIGNMENT_FAULT is
+  // not on by default, because some memory access instructions always fault.
+  // Notably, this is true for floating point accesses.
+  flags = HWCAP_ARMv7 | HWCAP_VFP | HWCAP_VFPv3 | HWCAP_VFPv4 | HWCAP_NEON |
+          HWCAP_IDIVA | HWCAP_FIXUP_FAULT;
+#else
+
+#  if defined(__linux__) || defined(ANDROID)
+  // This includes Android and B2G.
+  bool readAuxv = false;
+  int fd = open("/proc/self/auxv", O_RDONLY);
+  if (fd > 0) {
+    struct {
+      uint32_t a_type;
+      uint32_t a_val;
+    } aux;
+    while (read(fd, &aux, sizeof(aux))) {
+      if (aux.a_type == AT_HWCAP) {
+        flags = aux.a_val;
+        readAuxv = true;
+        break;
+      }
+    }
+    close(fd);
+  }
+
+  FILE* fp = fopen("/proc/cpuinfo", "r");
+  if (fp) {
+    char buf[1024] = {};
+    size_t len = fread(buf, sizeof(char), sizeof(buf) - 1, fp);
+    fclose(fp);
+    buf[len] = '\0';
+
+    // Read the cpuinfo Features if the auxv is not available.
+    if (!readAuxv) {
+      char* featureList = strstr(buf, "Features");
+      if (featureList) {
+        if (char* featuresEnd = strstr(featureList, "\n")) {
+          *featuresEnd = '\0';
+        }
+        flags = ParseARMCpuFeatures(featureList + 8);
+      }
+      if (strstr(buf, "ARMv7")) {
+        flags |= HWCAP_ARMv7;
+      }
+    }
+
+    // The exynos7420 cpu (EU galaxy S6 (Note)) has a bug where sometimes
+    // flushing doesn't invalidate the instruction cache. As a result we force
+    // it by calling the cacheFlush twice on different start addresses.
+    char* exynos7420 = strstr(buf, "Exynos7420");
+    if (exynos7420) {
+      forceDoubleCacheFlush = true;
+    }
+  }
+#  endif
+
+  // If compiled to use specialized features then these features can be
+  // assumed to be present otherwise the compiler would fail to run.
+
+#  ifdef JS_CODEGEN_ARM_HARDFP
+  // Compiled to use the hardfp ABI.
+  flags |= HWCAP_USE_HARDFP_ABI;
+#  endif
+
+#  if defined(__VFP_FP__) && !defined(__SOFTFP__)
+  // Compiled to use VFP instructions so assume VFP support.
+  flags |= HWCAP_VFP;
+#  endif
+
+#  if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
+  // Compiled to use ARMv7 instructions so assume the ARMv7 arch.
+  flags |= HWCAP_ARMv7;
+#  endif
+
+#  if defined(__APPLE__)
+#    if defined(__ARM_NEON__)
+  flags |= HWCAP_NEON;
+#    endif
+#    if defined(__ARMVFPV3__)
+  flags |= HWCAP_VFPv3 | HWCAP_VFPD32
+#    endif
+#  endif
+
+#endif  // JS_SIMULATOR_ARM
+
+  armHwCapFlags = CanonicalizeARMHwCapFlags(flags);
+
+  JitSpew(JitSpew_Codegen, "ARM HWCAP: 0x%x\n", armHwCapFlags);
+  return;
+}
+
+uint32_t GetARMFlags() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags;
+}
+
+bool HasNEON() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_NEON;
+}
+
+bool HasARMv7() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ARMv7;
+}
+
+bool HasMOVWT() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ARMv7;
+}
+
+bool HasLDSTREXBHD() {
+  // These are really available from ARMv6K and later, but why bother?
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ARMv7;
+}
+
+bool HasDMBDSBISB() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ARMv7;
+}
+
+bool HasVFPv3() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_VFPv3;
+}
+
+bool HasVFP() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_VFP;
+}
+
+bool Has32DP() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_VFPD32;
+}
+
+bool HasIDIV() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_IDIVA;
+}
+
+// This is defined in the header and inlined when not using the simulator.
+#ifdef JS_SIMULATOR_ARM
+bool UseHardFpABI() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_USE_HARDFP_ABI;
+}
+#endif
+
+Registers::Code Registers::FromName(const char* name) {
+  // Check for some register aliases first.
+  if (strcmp(name, "ip") == 0) {
+    return ip;
+  }
+  if (strcmp(name, "r13") == 0) {
+    return r13;
+  }
+  if (strcmp(name, "lr") == 0) {
+    return lr;
+  }
+  if (strcmp(name, "r15") == 0) {
+    return r15;
+  }
+
+  for (size_t i = 0; i < Total; i++) {
+    if (strcmp(GetName(i), name) == 0) {
+      return Code(i);
+    }
+  }
+
+  return Invalid;
+}
+
+FloatRegisters::Code FloatRegisters::FromName(const char* name) {
+  for (size_t i = 0; i < TotalSingle; ++i) {
+    if (strcmp(GetSingleName(Encoding(i)), name) == 0) {
+      return VFPRegister(i, VFPRegister::Single).code();
+    }
+  }
+  for (size_t i = 0; i < TotalDouble; ++i) {
+    if (strcmp(GetDoubleName(Encoding(i)), name) == 0) {
+      return VFPRegister(i, VFPRegister::Double).code();
+    }
+  }
+
+  return Invalid;
+}
+
+FloatRegisterSet VFPRegister::ReduceSetForPush(const FloatRegisterSet& s) {
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  LiveFloatRegisterSet mod;
+  for (FloatRegisterIterator iter(s); iter.more(); ++iter) {
+    if ((*iter).isSingle()) {
+      // Add in just this float.
+      mod.addUnchecked(*iter);
+    } else if ((*iter).id() < 16) {
+      // A double with an overlay, add in both floats.
+      mod.addUnchecked((*iter).singleOverlay(0));
+      mod.addUnchecked((*iter).singleOverlay(1));
+    } else {
+      // Add in the lone double in the range 16-31.
+      mod.addUnchecked(*iter);
+    }
+  }
+  return mod.set();
+}
+
+uint32_t VFPRegister::GetPushSizeInBytes(const FloatRegisterSet& s) {
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  FloatRegisterSet ss = s.reduceSetForPush();
+  uint64_t bits = ss.bits();
+  uint32_t ret = mozilla::CountPopulation32(bits & 0xffffffff) * sizeof(float);
+  ret += mozilla::CountPopulation32(bits >> 32) * sizeof(double);
+  return ret;
+}
+uint32_t VFPRegister::getRegisterDumpOffsetInBytes() {
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  if (isSingle()) {
+    return id() * sizeof(float);
+  }
+  if (isDouble()) {
+    return id() * sizeof(double);
+  }
+  MOZ_CRASH("not Single or Double");
+}
+
+uint32_t FloatRegisters::ActualTotalPhys() {
+  if (Has32DP()) {
+    return 32;
+  }
+  return 16;
+}
+
+void FlushICache(void* code, size_t size) {
+#if defined(JS_SIMULATOR_ARM)
+  js::jit::SimulatorProcess::FlushICache(code, size);
+
+#elif (defined(__linux__) || defined(ANDROID)) && defined(__GNUC__)
+  void* end = (void*)(reinterpret_cast<char*>(code) + size);
+  asm volatile(
+      "push    {r7}\n"
+      "mov     r0, %0\n"
+      "mov     r1, %1\n"
+      "mov     r7, #0xf0000\n"
+      "add     r7, r7, #0x2\n"
+      "mov     r2, #0x0\n"
+      "svc     0x0\n"
+      "pop     {r7}\n"
+      :
+      : "r"(code), "r"(end)
+      : "r0", "r1", "r2");
+
+  if (forceDoubleCacheFlush) {
+    void* start = (void*)((uintptr_t)code + 1);
+    asm volatile(
+        "push    {r7}\n"
+        "mov     r0, %0\n"
+        "mov     r1, %1\n"
+        "mov     r7, #0xf0000\n"
+        "add     r7, r7, #0x2\n"
+        "mov     r2, #0x0\n"
+        "svc     0x0\n"
+        "pop     {r7}\n"
+        :
+        : "r"(start), "r"(end)
+        : "r0", "r1", "r2");
+  }
+
+#elif defined(__FreeBSD__) || defined(__NetBSD__)
+  __clear_cache(code, reinterpret_cast<char*>(code) + size);
+
+#elif defined(XP_IOS)
+  sys_icache_invalidate(code, size);
+
+#else
+#  error "Unexpected platform"
+#endif
+}
+
+void FlushExecutionContext() {
+#ifndef JS_SIMULATOR_ARM
+  // Ensure that any instructions already in the pipeline are discarded and
+  // reloaded from the icache.
+  asm volatile("isb\n" : : : "memory");
+#else
+  // We assume the icache flushing routines on other platforms take care of this
+#endif
+}
+
+}  // namespace jit
+}  // namespace js
diff --git a/js/src/jit/arm/Architecture-arm.h b/js/src/jit/arm/Architecture-arm.h
new file mode 100644
index 0000000000..fa2ae8e0ed
--- /dev/null
+++ b/js/src/jit/arm/Architecture-arm.h
@@ -0,0 +1,733 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_Architecture_arm_h
+#define jit_arm_Architecture_arm_h
+
+#include "mozilla/MathAlgorithms.h"
+
+#include <algorithm>
+#include <limits.h>
+#include <stdint.h>
+
+#include "jit/shared/Architecture-shared.h"
+
+#include "js/Utility.h"
+
+// GCC versions 4.6 and above define __ARM_PCS_VFP to denote a hard-float
+// ABI target. The iOS toolchain doesn't define anything specific here,
+// but iOS always supports VFP.
+#if defined(__ARM_PCS_VFP) || defined(XP_IOS)
+#  define JS_CODEGEN_ARM_HARDFP
+#endif
+
+namespace js {
+namespace jit {
+
+// These offsets are specific to nunboxing, and capture offsets into the
+// components of a js::Value.
+static const int32_t NUNBOX32_TYPE_OFFSET = 4;
+static const int32_t NUNBOX32_PAYLOAD_OFFSET = 0;
+
+static const uint32_t ShadowStackSpace = 0;
+
+// How far forward/back can a jump go? Provide a generous buffer for thunks.
+static const uint32_t JumpImmediateRange = 20 * 1024 * 1024;
+
+class Registers {
+ public:
+  enum RegisterID {
+    r0 = 0,
+    r1,
+    r2,
+    r3,
+    r4,
+    r5,
+    r6,
+    r7,
+    r8,
+    r9,
+    r10,
+    r11,
+    fp = r11,
+    r12,
+    ip = r12,
+    r13,
+    sp = r13,
+    r14,
+    lr = r14,
+    r15,
+    pc = r15,
+    invalid_reg
+  };
+  typedef uint8_t Code;
+  typedef RegisterID Encoding;
+
+  // Content spilled during bailouts.
+  union RegisterContent {
+    uintptr_t r;
+  };
+
+  static const char* GetName(Code code) {
+    MOZ_ASSERT(code < Total);
+    static const char* const Names[] = {"r0",  "r1", "r2",  "r3", "r4",  "r5",
+                                        "r6",  "r7", "r8",  "r9", "r10", "r11",
+                                        "r12", "sp", "r14", "pc"};
+    return Names[code];
+  }
+  static const char* GetName(Encoding i) { return GetName(Code(i)); }
+
+  static Code FromName(const char* name);
+
+  static const Encoding StackPointer = sp;
+  static const Encoding Invalid = invalid_reg;
+
+  static const uint32_t Total = 16;
+  static const uint32_t Allocatable = 13;
+
+  typedef uint32_t SetType;
+
+  static const SetType AllMask = (1 << Total) - 1;
+  static const SetType ArgRegMask =
+      (1 << r0) | (1 << r1) | (1 << r2) | (1 << r3);
+
+  static const SetType VolatileMask =
+      (1 << r0) | (1 << r1) | (1 << Registers::r2) |
+      (1 << Registers::r3)
+#if defined(XP_IOS)
+      // per
+      // https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html#//apple_ref/doc/uid/TP40009021-SW4
+      | (1 << Registers::r9)
+#endif
+      ;
+
+  static const SetType NonVolatileMask =
+      (1 << Registers::r4) | (1 << Registers::r5) | (1 << Registers::r6) |
+      (1 << Registers::r7) | (1 << Registers::r8) |
+#if !defined(XP_IOS)
+      (1 << Registers::r9) |
+#endif
+      (1 << Registers::r10) | (1 << Registers::r11) | (1 << Registers::r12) |
+      (1 << Registers::r14);
+
+  static const SetType WrapperMask = VolatileMask |          // = arguments
+                                     (1 << Registers::r4) |  // = outReg
+                                     (1 << Registers::r5);   // = argBase
+
+  static const SetType NonAllocatableMask =
+      (1 << Registers::sp) | (1 << Registers::r12) |  // r12 = ip = scratch
+      (1 << Registers::lr) | (1 << Registers::pc) | (1 << Registers::fp);
+
+  // Registers returned from a JS -> JS call.
+  static const SetType JSCallMask = (1 << Registers::r2) | (1 << Registers::r3);
+
+  // Registers returned from a JS -> C call.
+  static const SetType CallMask =
+      (1 << Registers::r0) |
+      (1 << Registers::r1);  // Used for double-size returns.
+
+  static const SetType AllocatableMask = AllMask & ~NonAllocatableMask;
+
+  static uint32_t SetSize(SetType x) {
+    static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+    return mozilla::CountPopulation32(x);
+  }
+  static uint32_t FirstBit(SetType x) {
+    return mozilla::CountTrailingZeroes32(x);
+  }
+  static uint32_t LastBit(SetType x) {
+    return 31 - mozilla::CountLeadingZeroes32(x);
+  }
+};
+
+// Smallest integer type that can hold a register bitmask.
+typedef uint16_t PackedRegisterMask;
+
+class FloatRegisters {
+ public:
+  enum FPRegisterID {
+    s0,
+    s1,
+    s2,
+    s3,
+    s4,
+    s5,
+    s6,
+    s7,
+    s8,
+    s9,
+    s10,
+    s11,
+    s12,
+    s13,
+    s14,
+    s15,
+    s16,
+    s17,
+    s18,
+    s19,
+    s20,
+    s21,
+    s22,
+    s23,
+    s24,
+    s25,
+    s26,
+    s27,
+    s28,
+    s29,
+    s30,
+    s31,
+    d0,
+    d1,
+    d2,
+    d3,
+    d4,
+    d5,
+    d6,
+    d7,
+    d8,
+    d9,
+    d10,
+    d11,
+    d12,
+    d13,
+    d14,
+    d15,
+    d16,
+    d17,
+    d18,
+    d19,
+    d20,
+    d21,
+    d22,
+    d23,
+    d24,
+    d25,
+    d26,
+    d27,
+    d28,
+    d29,
+    d30,
+    d31,
+    invalid_freg
+  };
+
+  typedef uint32_t Code;
+  typedef FPRegisterID Encoding;
+
+  // Content spilled during bailouts.
+  union RegisterContent {
+    double d;
+  };
+
+  static const char* GetDoubleName(Encoding code) {
+    static const char* const Names[] = {
+        "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
+        "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
+        "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+        "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
+    return Names[code];
+  }
+  static const char* GetSingleName(Encoding code) {
+    static const char* const Names[] = {
+        "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
+        "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
+        "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+        "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"};
+    return Names[code];
+  }
+
+  static Code FromName(const char* name);
+
+  static const Encoding Invalid = invalid_freg;
+  static const uint32_t Total = 48;
+  static const uint32_t TotalDouble = 16;
+  static const uint32_t TotalSingle = 32;
+  static const uint32_t Allocatable = 45;
+  // There are only 32 places that we can put values.
+  static const uint32_t TotalPhys = 32;
+  static uint32_t ActualTotalPhys();
+
+  /* clang-format off */
+    // ARM float registers overlap in a way that for 1 double registers, in the
+    // range d0-d15, we have 2 singles register in the range s0-s31. d16-d31
+    // have no single register aliases.  The aliasing rule state that d{n}
+    // aliases s{2n} and s{2n+1}, for n in [0 .. 15].
+    //
+    // The register set is used to represent either allocatable register or live
+    // registers. The register maps d0-d15 and s0-s31 to a single bit each. The
+    // registers d16-d31 are not used at the moment.
+    //
+    // uuuu uuuu uuuu uuuu dddd dddd dddd dddd ssss ssss ssss ssss ssss ssss ssss ssss
+    //                     ^                 ^ ^                                     ^
+    //                     '-- d15      d0 --' '-- s31                          s0 --'
+    //
+    // LiveSet are handled by adding the bit of each register without
+    // considering the aliases.
+    //
+    // AllocatableSet are handled by adding and removing the bit of each
+    // aligned-or-dominated-aliased registers.
+    //
+    //     ...0...00... : s{2n}, s{2n+1} and d{n} are not available
+    //     ...1...01... : s{2n} is available (*)
+    //     ...0...10... : s{2n+1} is available
+    //     ...1...11... : s{2n}, s{2n+1} and d{n} are available
+    //
+    // (*) Note that d{n} bit is set, but is not available because s{2n+1} bit
+    // is not set, which is required as d{n} dominates s{2n+1}. The d{n} bit is
+    // set, because s{2n} is aligned.
+    //
+    //        |        d{n}       |
+    //        | s{2n+1} |  s{2n}  |
+    //
+  /* clang-format on */
+  typedef uint64_t SetType;
+  static const SetType AllSingleMask = (1ull << TotalSingle) - 1;
+  static const SetType AllDoubleMask = ((1ull << TotalDouble) - 1)
+                                       << TotalSingle;
+  static const SetType AllMask = AllDoubleMask | AllSingleMask;
+
+  // d15 is the ScratchFloatReg.
+  static const SetType NonVolatileDoubleMask =
+      ((1ULL << d8) | (1ULL << d9) | (1ULL << d10) | (1ULL << d11) |
+       (1ULL << d12) | (1ULL << d13) | (1ULL << d14));
+  // s30 and s31 alias d15.
+  static const SetType NonVolatileMask =
+      (NonVolatileDoubleMask |
+       ((1 << s16) | (1 << s17) | (1 << s18) | (1 << s19) | (1 << s20) |
+        (1 << s21) | (1 << s22) | (1 << s23) | (1 << s24) | (1 << s25) |
+        (1 << s26) | (1 << s27) | (1 << s28) | (1 << s29) | (1 << s30)));
+
+  static const SetType VolatileMask = AllMask & ~NonVolatileMask;
+  static const SetType VolatileDoubleMask =
+      AllDoubleMask & ~NonVolatileDoubleMask;
+
+  static const SetType WrapperMask = VolatileMask;
+
+  // d15 is the ARM scratch float register.
+  // s30 and s31 alias d15.
+  static const SetType NonAllocatableMask =
+      ((1ULL << d15)) | (1ULL << s30) | (1ULL << s31);
+
+  static const SetType AllocatableMask = AllMask & ~NonAllocatableMask;
+};
+
+static const uint32_t SpillSlotSize =
+    std::max(sizeof(Registers::RegisterContent),
+             sizeof(FloatRegisters::RegisterContent));
+
+template <typename T>
+class TypedRegisterSet;
+
+class VFPRegister {
+ public:
+  // What type of data is being stored in this register? UInt / Int are
+  // specifically for vcvt, where we need to know how the data is supposed to
+  // be converted.
+  enum RegType : uint8_t { Single = 0x0, Double = 0x1, UInt = 0x2, Int = 0x3 };
+
+  typedef FloatRegisters Codes;
+  typedef Codes::Code Code;
+  typedef Codes::Encoding Encoding;
+
+  // Bitfields below are all uint32_t to make sure MSVC packs them correctly.
+ public:
+  // ARM doesn't have more than 32 registers of each type, so 5 bits should
+  // suffice.
+  uint32_t code_ : 5;
+
+ protected:
+  uint32_t kind : 2;
+  uint32_t _isInvalid : 1;
+  uint32_t _isMissing : 1;
+
+ public:
+  constexpr VFPRegister(uint32_t r, RegType k)
+      : code_(Code(r)), kind(k), _isInvalid(false), _isMissing(false) {}
+  constexpr VFPRegister()
+      : code_(Code(0)), kind(Double), _isInvalid(true), _isMissing(false) {}
+
+  constexpr VFPRegister(RegType k, uint32_t id, bool invalid, bool missing)
+      : code_(Code(id)), kind(k), _isInvalid(invalid), _isMissing(missing) {}
+
+  explicit constexpr VFPRegister(Code id)
+      : code_(id), kind(Double), _isInvalid(false), _isMissing(false) {}
+  bool operator==(const VFPRegister& other) const {
+    return kind == other.kind && code_ == other.code_ &&
+           isInvalid() == other.isInvalid();
+  }
+  bool operator!=(const VFPRegister& other) const { return !operator==(other); }
+
+  bool isSingle() const { return kind == Single; }
+  bool isDouble() const { return kind == Double; }
+  bool isSimd128() const { return false; }
+  bool isFloat() const { return (kind == Double) || (kind == Single); }
+  bool isInt() const { return (kind == UInt) || (kind == Int); }
+  bool isSInt() const { return kind == Int; }
+  bool isUInt() const { return kind == UInt; }
+  bool equiv(const VFPRegister& other) const { return other.kind == kind; }
+  size_t size() const { return (kind == Double) ? 8 : 4; }
+  bool isInvalid() const { return _isInvalid; }
+  bool isMissing() const {
+    MOZ_ASSERT(!_isInvalid);
+    return _isMissing;
+  }
+
+  VFPRegister doubleOverlay(unsigned int which = 0) const;
+  VFPRegister singleOverlay(unsigned int which = 0) const;
+  VFPRegister sintOverlay(unsigned int which = 0) const;
+  VFPRegister uintOverlay(unsigned int which = 0) const;
+
+  VFPRegister asSingle() const { return singleOverlay(); }
+  VFPRegister asDouble() const { return doubleOverlay(); }
+  VFPRegister asSimd128() const { MOZ_CRASH("NYI"); }
+
+  struct VFPRegIndexSplit;
+  VFPRegIndexSplit encode();
+
+  // For serializing values.
+  struct VFPRegIndexSplit {
+    const uint32_t block : 4;
+    const uint32_t bit : 1;
+
+   private:
+    friend VFPRegIndexSplit js::jit::VFPRegister::encode();
+
+    VFPRegIndexSplit(uint32_t block_, uint32_t bit_)
+        : block(block_), bit(bit_) {
+      MOZ_ASSERT(block == block_);
+      MOZ_ASSERT(bit == bit_);
+    }
+  };
+
+  Code code() const {
+    MOZ_ASSERT(!_isInvalid && !_isMissing);
+    // This should only be used in areas where we only have doubles and
+    // singles.
+    MOZ_ASSERT(isFloat());
+    return Code(code_ | (kind << 5));
+  }
+  Encoding encoding() const {
+    MOZ_ASSERT(!_isInvalid && !_isMissing);
+    return Encoding(code_);
+  }
+  uint32_t id() const { return code_; }
+  static VFPRegister FromCode(uint32_t i) {
+    uint32_t code = i & 31;
+    uint32_t kind = i >> 5;
+    return VFPRegister(code, RegType(kind));
+  }
+  bool volatile_() const {
+    if (isDouble()) {
+      return !!((1ULL << (code_ >> 1)) & FloatRegisters::VolatileMask);
+    }
+    return !!((1ULL << code_) & FloatRegisters::VolatileMask);
+  }
+  const char* name() const {
+    if (isDouble()) {
+      return FloatRegisters::GetDoubleName(Encoding(code_));
+    }
+    return FloatRegisters::GetSingleName(Encoding(code_));
+  }
+  bool aliases(const VFPRegister& other) {
+    if (kind == other.kind) {
+      return code_ == other.code_;
+    }
+    return doubleOverlay() == other.doubleOverlay();
+  }
+  static const int NumAliasedDoubles = 16;
+  uint32_t numAliased() const {
+    if (isDouble()) {
+      if (code_ < NumAliasedDoubles) {
+        return 3;
+      }
+      return 1;
+    }
+    return 2;
+  }
+
+  VFPRegister aliased(uint32_t aliasIdx) {
+    if (aliasIdx == 0) {
+      return *this;
+    }
+    if (isDouble()) {
+      MOZ_ASSERT(code_ < NumAliasedDoubles);
+      MOZ_ASSERT(aliasIdx <= 2);
+      return singleOverlay(aliasIdx - 1);
+    }
+    MOZ_ASSERT(aliasIdx == 1);
+    return doubleOverlay(aliasIdx - 1);
+  }
+  uint32_t numAlignedAliased() const {
+    if (isDouble()) {
+      if (code_ < NumAliasedDoubles) {
+        return 2;
+      }
+      return 1;
+    }
+    // s1 has 0 other aligned aliases, 1 total.
+    // s0 has 1 other aligned aliase, 2 total.
+    return 2 - (code_ & 1);
+  }
+  // |   d0    |
+  // | s0 | s1 |
+  // If we've stored s0 and s1 in memory, we also want to say that d0 is
+  // stored there, but it is only stored at the location where it is aligned
+  // e.g. at s0, not s1.
+  VFPRegister alignedAliased(uint32_t aliasIdx) {
+    if (aliasIdx == 0) {
+      return *this;
+    }
+    MOZ_ASSERT(aliasIdx == 1);
+    if (isDouble()) {
+      MOZ_ASSERT(code_ < NumAliasedDoubles);
+      return singleOverlay(aliasIdx - 1);
+    }
+    MOZ_ASSERT((code_ & 1) == 0);
+    return doubleOverlay(aliasIdx - 1);
+  }
+
+  typedef FloatRegisters::SetType SetType;
+
+  // This function is used to ensure that Register set can take all Single
+  // registers, even if we are taking a mix of either double or single
+  // registers.
+  //
+  //   s0.alignedOrDominatedAliasedSet() == s0 | d0.
+  //   s1.alignedOrDominatedAliasedSet() == s1.
+  //   d0.alignedOrDominatedAliasedSet() == s0 | s1 | d0.
+  //
+  // This way the Allocatable register set does not have to do any arithmetics
+  // to know if a register is available or not, as we have the following
+  // relations:
+  //
+  //  d0.alignedOrDominatedAliasedSet() ==
+  //      s0.alignedOrDominatedAliasedSet() | s1.alignedOrDominatedAliasedSet()
+  //
+  //  s0.alignedOrDominatedAliasedSet() & s1.alignedOrDominatedAliasedSet() == 0
+  //
+  SetType alignedOrDominatedAliasedSet() const {
+    if (isSingle()) {
+      if (code_ % 2 != 0) {
+        return SetType(1) << code_;
+      }
+      return (SetType(1) << code_) | (SetType(1) << (32 + code_ / 2));
+    }
+
+    MOZ_ASSERT(isDouble());
+    return (SetType(0b11) << (code_ * 2)) | (SetType(1) << (32 + code_));
+  }
+
+  static constexpr RegTypeName DefaultType = RegTypeName::Float64;
+
+  template <RegTypeName = DefaultType>
+  static SetType LiveAsIndexableSet(SetType s) {
+    return SetType(0);
+  }
+
+  template <RegTypeName Name = DefaultType>
+  static SetType AllocatableAsIndexableSet(SetType s) {
+    static_assert(Name != RegTypeName::Any, "Allocatable set are not iterable");
+    return SetType(0);
+  }
+
+  static uint32_t SetSize(SetType x) {
+    static_assert(sizeof(SetType) == 8, "SetType must be 64 bits");
+    return mozilla::CountPopulation32(x);
+  }
+  static Code FromName(const char* name) {
+    return FloatRegisters::FromName(name);
+  }
+  static TypedRegisterSet<VFPRegister> ReduceSetForPush(
+      const TypedRegisterSet<VFPRegister>& s);
+  static uint32_t GetPushSizeInBytes(const TypedRegisterSet<VFPRegister>& s);
+  uint32_t getRegisterDumpOffsetInBytes();
+  static uint32_t FirstBit(SetType x) {
+    return mozilla::CountTrailingZeroes64(x);
+  }
+  static uint32_t LastBit(SetType x) {
+    return 63 - mozilla::CountLeadingZeroes64(x);
+  }
+};
+
+template <>
+inline VFPRegister::SetType
+VFPRegister::LiveAsIndexableSet<RegTypeName::Float32>(SetType set) {
+  return set & FloatRegisters::AllSingleMask;
+}
+
+template <>
+inline VFPRegister::SetType
+VFPRegister::LiveAsIndexableSet<RegTypeName::Float64>(SetType set) {
+  return set & FloatRegisters::AllDoubleMask;
+}
+
+template <>
+inline VFPRegister::SetType VFPRegister::LiveAsIndexableSet<RegTypeName::Any>(
+    SetType set) {
+  return set;
+}
+
+template <>
+inline VFPRegister::SetType
+VFPRegister::AllocatableAsIndexableSet<RegTypeName::Float32>(SetType set) {
+  // Single registers are not dominating any smaller registers, thus masking
+  // is enough to convert an allocatable set into a set of register list all
+  // single register available.
+  return set & FloatRegisters::AllSingleMask;
+}
+
+template <>
+inline VFPRegister::SetType
+VFPRegister::AllocatableAsIndexableSet<RegTypeName::Float64>(SetType set) {
+  /* clang-format off */
+    // An allocatable float register set is represented as follow:
+    //
+    // uuuu uuuu uuuu uuuu dddd dddd dddd dddd ssss ssss ssss ssss ssss ssss ssss ssss
+    //                     ^                 ^ ^                                     ^
+    //                     '-- d15      d0 --' '-- s31                          s0 --'
+    //
+    //     ...0...00... : s{2n}, s{2n+1} and d{n} are not available
+    //     ...1...01... : s{2n} is available
+    //     ...0...10... : s{2n+1} is available
+    //     ...1...11... : s{2n}, s{2n+1} and d{n} are available
+    //
+    // The goal of this function is to return the set of double registers which
+    // are available as an indexable bit set. This implies that iff a double bit
+    // is set in the returned set, then the register is available.
+    //
+    // To do so, this functions converts the 32 bits set of single registers
+    // into a 16 bits set of equivalent double registers. Then, we mask out
+    // double registers which do not have all the single register that compose
+    // them. As d{n} bit is set when s{2n} is available, we only need to take
+    // s{2n+1} into account.
+  /* clang-format on */
+
+  // Convert  s7s6s5s4 s3s2s1s0  into  s7s5s3s1, for all s0-s31.
+  SetType s2d = AllocatableAsIndexableSet<RegTypeName::Float32>(set);
+  static_assert(FloatRegisters::TotalSingle == 32, "Wrong mask");
+  s2d = (0xaaaaaaaa & s2d) >> 1;  // Filter s{2n+1} registers.
+  // Group adjacent bits as follow:
+  //     0.0.s3.s1 == ((0.s3.0.s1) >> 1 | (0.s3.0.s1)) & 0b0011;
+  s2d = ((s2d >> 1) | s2d) & 0x33333333;  // 0a0b --> 00ab
+  s2d = ((s2d >> 2) | s2d) & 0x0f0f0f0f;  // 00ab00cd --> 0000abcd
+  s2d = ((s2d >> 4) | s2d) & 0x00ff00ff;
+  s2d = ((s2d >> 8) | s2d) & 0x0000ffff;
+  // Move the s7s5s3s1 to the aliased double positions.
+  s2d = s2d << FloatRegisters::TotalSingle;
+
+  // Note: We currently do not use any representation for d16-d31.
+  static_assert(FloatRegisters::TotalDouble == 16,
+                "d16-d31 do not have a single register mapping");
+
+  // Filter out any double register which are not allocatable due to
+  // non-aligned dominated single registers.
+  return set & s2d;
+}
+
+// The only floating point register set that we work with are the VFP Registers.
+typedef VFPRegister FloatRegister;
+
+uint32_t GetARMFlags();
+bool HasARMv7();
+bool HasMOVWT();
+bool HasLDSTREXBHD();  // {LD,ST}REX{B,H,D}
+bool HasDMBDSBISB();   // DMB, DSB, and ISB
+bool HasVFPv3();
+bool HasVFP();
+bool Has32DP();
+bool HasIDIV();
+bool HasNEON();
+
+extern volatile uint32_t armHwCapFlags;
+
+// Not part of the HWCAP flag, but we need to know these and these bits are not
+// used. Define these here so that their use can be inlined by the simulator.
+
+// A bit to flag when signaled alignment faults are to be fixed up.
+#define HWCAP_FIXUP_FAULT (1 << 24)
+
+// A bit to flag when the flags are uninitialized, so they can be atomically
+// set.
+#define HWCAP_UNINITIALIZED (1 << 25)
+
+// A bit to flag when alignment faults are enabled and signal.
+#define HWCAP_ALIGNMENT_FAULT (1 << 26)
+
+// A bit to flag the use of the hardfp ABI.
+#define HWCAP_USE_HARDFP_ABI (1 << 27)
+
+// A bit to flag the use of the ARMv7 arch, otherwise ARMv6.
+#define HWCAP_ARMv7 (1 << 28)
+
+// Top three bits are reserved, do not use them.
+
+// Returns true when cpu alignment faults are enabled and signaled, and thus we
+// should ensure loads and stores are aligned.
+inline bool HasAlignmentFault() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ALIGNMENT_FAULT;
+}
+
+#ifdef JS_SIMULATOR_ARM
+// Returns true when cpu alignment faults will be fixed up by the
+// "operating system", which functionality we will emulate.
+inline bool FixupFault() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_FIXUP_FAULT;
+}
+#endif
+
+// Arm/D32 has double registers that can NOT be treated as float32 and this
+// requires some dances in lowering.
+inline bool hasUnaliasedDouble() { return Has32DP(); }
+
+// On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32 to
+// a double as a temporary, you need a temporary double register.
+inline bool hasMultiAlias() { return true; }
+
+// InitARMFlags is called from the JitContext constructor to read the hardware
+// flags.  The call is a no-op after the first call, or if the JS shell has
+// already set the flags (it has a command line switch for this, see
+// ParseARMHwCapFlags).
+//
+// If the environment variable ARMHWCAP is set then the flags are read from it
+// instead; see ParseARMHwCapFlags.
+void InitARMFlags();
+
+// Register a string denoting ARM hardware flags. During engine initialization,
+// these flags will then be used instead of the actual hardware capabilities.
+// This must be called before JS_Init and the passed string's buffer must
+// outlive the JS_Init call.
+void SetARMHwCapFlagsString(const char* armHwCap);
+
+// Retrive the ARM hardware flags at a bitmask.  They must have been set.
+uint32_t GetARMFlags();
+
+// If the simulator is used then the ABI choice is dynamic. Otherwise the ABI is
+// static and useHardFpABI is inlined so that unused branches can be optimized
+// away.
+#ifdef JS_SIMULATOR_ARM
+bool UseHardFpABI();
+#else
+static inline bool UseHardFpABI() {
+#  if defined(JS_CODEGEN_ARM_HARDFP)
+  return true;
+#  else
+  return false;
+#  endif
+}
+#endif
+
+// In order to handle SoftFp ABI calls, we need to be able to express that we
+// have ABIArg which are represented by pair of general purpose registers.
+#define JS_CODEGEN_REGISTER_PAIR 1
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_Architecture_arm_h */
diff --git a/js/src/jit/arm/Assembler-arm.cpp b/js/src/jit/arm/Assembler-arm.cpp
new file mode 100644
index 0000000000..a1213b6f21
--- /dev/null
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -0,0 +1,2832 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/Assembler-arm.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Sprintf.h"
+
+#include <type_traits>
+
+#include "gc/Marking.h"
+#include "jit/arm/disasm/Disasm-arm.h"
+#include "jit/arm/MacroAssembler-arm.h"
+#include "jit/AutoWritableJitCode.h"
+#include "jit/ExecutableAllocator.h"
+#include "jit/MacroAssembler.h"
+#include "vm/Realm.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::CountLeadingZeroes32;
+using mozilla::DebugOnly;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+
+void dbg_break() {}
+
+// The ABIArgGenerator is used for making system ABI calls and for inter-wasm
+// calls. The system ABI can either be SoftFp or HardFp, and inter-wasm calls
+// are always HardFp calls. The initialization defaults to HardFp, and the ABI
+// choice is made before any system ABI calls with the method "setUseHardFp".
+ABIArgGenerator::ABIArgGenerator()
+    : intRegIndex_(0),
+      floatRegIndex_(0),
+      stackOffset_(0),
+      current_(),
+      useHardFp_(true) {}
+
+// See the "Parameter Passing" section of the "Procedure Call Standard for the
+// ARM Architecture" documentation.
+ABIArg ABIArgGenerator::softNext(MIRType type) {
+  switch (type) {
+    case MIRType::Int32:
+    case MIRType::Pointer:
+    case MIRType::RefOrNull:
+    case MIRType::StackResults:
+      if (intRegIndex_ == NumIntArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint32_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_));
+      intRegIndex_++;
+      break;
+    case MIRType::Int64:
+      // Make sure to use an even register index. Increase to next even number
+      // when odd.
+      intRegIndex_ = (intRegIndex_ + 1) & ~1;
+      if (intRegIndex_ == NumIntArgRegs) {
+        // Align the stack on 8 bytes.
+        static const uint32_t align = sizeof(uint64_t) - 1;
+        stackOffset_ = (stackOffset_ + align) & ~align;
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint64_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_),
+                        Register::FromCode(intRegIndex_ + 1));
+      intRegIndex_ += 2;
+      break;
+    case MIRType::Float32:
+      if (intRegIndex_ == NumIntArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint32_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_));
+      intRegIndex_++;
+      break;
+    case MIRType::Double:
+      // Make sure to use an even register index. Increase to next even number
+      // when odd.
+      intRegIndex_ = (intRegIndex_ + 1) & ~1;
+      if (intRegIndex_ == NumIntArgRegs) {
+        // Align the stack on 8 bytes.
+        static const uint32_t align = sizeof(double) - 1;
+        stackOffset_ = (stackOffset_ + align) & ~align;
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(double);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_),
+                        Register::FromCode(intRegIndex_ + 1));
+      intRegIndex_ += 2;
+      break;
+    default:
+      MOZ_CRASH("Unexpected argument type");
+  }
+
+  return current_;
+}
+
+ABIArg ABIArgGenerator::hardNext(MIRType type) {
+  switch (type) {
+    case MIRType::Int32:
+    case MIRType::Pointer:
+    case MIRType::RefOrNull:
+    case MIRType::StackResults:
+      if (intRegIndex_ == NumIntArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint32_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_));
+      intRegIndex_++;
+      break;
+    case MIRType::Int64:
+      // Make sure to use an even register index. Increase to next even number
+      // when odd.
+      intRegIndex_ = (intRegIndex_ + 1) & ~1;
+      if (intRegIndex_ == NumIntArgRegs) {
+        // Align the stack on 8 bytes.
+        static const uint32_t align = sizeof(uint64_t) - 1;
+        stackOffset_ = (stackOffset_ + align) & ~align;
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint64_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_),
+                        Register::FromCode(intRegIndex_ + 1));
+      intRegIndex_ += 2;
+      break;
+    case MIRType::Float32:
+      if (floatRegIndex_ == NumFloatArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint32_t);
+        break;
+      }
+      current_ = ABIArg(VFPRegister(floatRegIndex_, VFPRegister::Single));
+      floatRegIndex_++;
+      break;
+    case MIRType::Double:
+      // Double register are composed of 2 float registers, thus we have to
+      // skip any float register which cannot be used in a pair of float
+      // registers in which a double value can be stored.
+      floatRegIndex_ = (floatRegIndex_ + 1) & ~1;
+      if (floatRegIndex_ == NumFloatArgRegs) {
+        static const uint32_t align = sizeof(double) - 1;
+        stackOffset_ = (stackOffset_ + align) & ~align;
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint64_t);
+        break;
+      }
+      current_ = ABIArg(VFPRegister(floatRegIndex_ >> 1, VFPRegister::Double));
+      floatRegIndex_ += 2;
+      break;
+    default:
+      MOZ_CRASH("Unexpected argument type");
+  }
+
+  return current_;
+}
+
+ABIArg ABIArgGenerator::next(MIRType type) {
+  if (useHardFp_) {
+    return hardNext(type);
+  }
+  return softNext(type);
+}
+
+bool js::jit::IsUnaligned(const wasm::MemoryAccessDesc& access) {
+  if (!access.align()) {
+    return false;
+  }
+
+  if (access.type() == Scalar::Float64 && access.align() >= 4) {
+    return false;
+  }
+
+  return access.align() < access.byteSize();
+}
+
+// Encode a standard register when it is being used as src1, the dest, and an
+// extra register. These should never be called with an InvalidReg.
+uint32_t js::jit::RT(Register r) {
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 12;
+}
+
+uint32_t js::jit::RN(Register r) {
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 16;
+}
+
+uint32_t js::jit::RD(Register r) {
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 12;
+}
+
+uint32_t js::jit::RM(Register r) {
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 8;
+}
+
+// Encode a standard register when it is being used as src1, the dest, and an
+// extra register. For these, an InvalidReg is used to indicate a optional
+// register that has been omitted.
+uint32_t js::jit::maybeRT(Register r) {
+  if (r == InvalidReg) {
+    return 0;
+  }
+
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 12;
+}
+
+uint32_t js::jit::maybeRN(Register r) {
+  if (r == InvalidReg) {
+    return 0;
+  }
+
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 16;
+}
+
+uint32_t js::jit::maybeRD(Register r) {
+  if (r == InvalidReg) {
+    return 0;
+  }
+
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 12;
+}
+
+Register js::jit::toRD(Instruction i) {
+  return Register::FromCode((i.encode() >> 12) & 0xf);
+}
+Register js::jit::toR(Instruction i) {
+  return Register::FromCode(i.encode() & 0xf);
+}
+
+Register js::jit::toRM(Instruction i) {
+  return Register::FromCode((i.encode() >> 8) & 0xf);
+}
+
+Register js::jit::toRN(Instruction i) {
+  return Register::FromCode((i.encode() >> 16) & 0xf);
+}
+
+uint32_t js::jit::VD(VFPRegister vr) {
+  if (vr.isMissing()) {
+    return 0;
+  }
+
+  // Bits 15,14,13,12, 22.
+  VFPRegister::VFPRegIndexSplit s = vr.encode();
+  return s.bit << 22 | s.block << 12;
+}
+uint32_t js::jit::VN(VFPRegister vr) {
+  if (vr.isMissing()) {
+    return 0;
+  }
+
+  // Bits 19,18,17,16, 7.
+  VFPRegister::VFPRegIndexSplit s = vr.encode();
+  return s.bit << 7 | s.block << 16;
+}
+uint32_t js::jit::VM(VFPRegister vr) {
+  if (vr.isMissing()) {
+    return 0;
+  }
+
+  // Bits 5, 3,2,1,0.
+  VFPRegister::VFPRegIndexSplit s = vr.encode();
+  return s.bit << 5 | s.block;
+}
+
+VFPRegister::VFPRegIndexSplit jit::VFPRegister::encode() {
+  MOZ_ASSERT(!_isInvalid);
+
+  switch (kind) {
+    case Double:
+      return VFPRegIndexSplit(code_ & 0xf, code_ >> 4);
+    case Single:
+      return VFPRegIndexSplit(code_ >> 1, code_ & 1);
+    default:
+      // VFP register treated as an integer, NOT a gpr.
+      return VFPRegIndexSplit(code_ >> 1, code_ & 1);
+  }
+}
+
+bool InstDTR::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsDTRMask) == (uint32_t)IsDTR;
+}
+
+InstDTR* InstDTR::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstDTR*)&i;
+  }
+  return nullptr;
+}
+
+bool InstLDR::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsDTRMask) == (uint32_t)IsDTR;
+}
+
+InstLDR* InstLDR::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstLDR*)&i;
+  }
+  return nullptr;
+}
+
+InstNOP* InstNOP::AsTHIS(Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstNOP*)&i;
+  }
+  return nullptr;
+}
+
+bool InstNOP::IsTHIS(const Instruction& i) {
+  return (i.encode() & 0x0fffffff) == NopInst;
+}
+
+bool InstBranchReg::IsTHIS(const Instruction& i) {
+  return InstBXReg::IsTHIS(i) || InstBLXReg::IsTHIS(i);
+}
+
+InstBranchReg* InstBranchReg::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBranchReg*)&i;
+  }
+  return nullptr;
+}
+void InstBranchReg::extractDest(Register* dest) { *dest = toR(*this); }
+bool InstBranchReg::checkDest(Register dest) { return dest == toR(*this); }
+
+bool InstBranchImm::IsTHIS(const Instruction& i) {
+  return InstBImm::IsTHIS(i) || InstBLImm::IsTHIS(i);
+}
+
+InstBranchImm* InstBranchImm::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBranchImm*)&i;
+  }
+  return nullptr;
+}
+
+void InstBranchImm::extractImm(BOffImm* dest) { *dest = BOffImm(*this); }
+
+bool InstBXReg::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsBRegMask) == IsBX;
+}
+
+InstBXReg* InstBXReg::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBXReg*)&i;
+  }
+  return nullptr;
+}
+
+bool InstBLXReg::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsBRegMask) == IsBLX;
+}
+InstBLXReg* InstBLXReg::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBLXReg*)&i;
+  }
+  return nullptr;
+}
+
+bool InstBImm::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsBImmMask) == IsB;
+}
+InstBImm* InstBImm::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBImm*)&i;
+  }
+  return nullptr;
+}
+
+bool InstBLImm::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsBImmMask) == IsBL;
+}
+InstBLImm* InstBLImm::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBLImm*)&i;
+  }
+  return nullptr;
+}
+
+bool InstMovWT::IsTHIS(Instruction& i) {
+  return InstMovW::IsTHIS(i) || InstMovT::IsTHIS(i);
+}
+InstMovWT* InstMovWT::AsTHIS(Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstMovWT*)&i;
+  }
+  return nullptr;
+}
+
+void InstMovWT::extractImm(Imm16* imm) { *imm = Imm16(*this); }
+bool InstMovWT::checkImm(Imm16 imm) {
+  return imm.decode() == Imm16(*this).decode();
+}
+
+void InstMovWT::extractDest(Register* dest) { *dest = toRD(*this); }
+bool InstMovWT::checkDest(Register dest) { return dest == toRD(*this); }
+
+bool InstMovW::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsWTMask) == IsW;
+}
+
+InstMovW* InstMovW::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstMovW*)&i;
+  }
+  return nullptr;
+}
+InstMovT* InstMovT::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstMovT*)&i;
+  }
+  return nullptr;
+}
+
+bool InstMovT::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsWTMask) == IsT;
+}
+
+InstALU* InstALU::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstALU*)&i;
+  }
+  return nullptr;
+}
+bool InstALU::IsTHIS(const Instruction& i) {
+  return (i.encode() & ALUMask) == 0;
+}
+void InstALU::extractOp(ALUOp* ret) { *ret = ALUOp(encode() & (0xf << 21)); }
+bool InstALU::checkOp(ALUOp op) {
+  ALUOp mine;
+  extractOp(&mine);
+  return mine == op;
+}
+void InstALU::extractDest(Register* ret) { *ret = toRD(*this); }
+bool InstALU::checkDest(Register rd) { return rd == toRD(*this); }
+void InstALU::extractOp1(Register* ret) { *ret = toRN(*this); }
+bool InstALU::checkOp1(Register rn) { return rn == toRN(*this); }
+Operand2 InstALU::extractOp2() { return Operand2(encode()); }
+
+InstCMP* InstCMP::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstCMP*)&i;
+  }
+  return nullptr;
+}
+
+bool InstCMP::IsTHIS(const Instruction& i) {
+  return InstALU::IsTHIS(i) && InstALU::AsTHIS(i)->checkDest(r0) &&
+         InstALU::AsTHIS(i)->checkOp(OpCmp);
+}
+
+InstMOV* InstMOV::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstMOV*)&i;
+  }
+  return nullptr;
+}
+
+bool InstMOV::IsTHIS(const Instruction& i) {
+  return InstALU::IsTHIS(i) && InstALU::AsTHIS(i)->checkOp1(r0) &&
+         InstALU::AsTHIS(i)->checkOp(OpMov);
+}
+
+Op2Reg Operand2::toOp2Reg() const { return *(Op2Reg*)this; }
+
+Imm16::Imm16(Instruction& inst)
+    : lower_(inst.encode() & 0xfff),
+      upper_(inst.encode() >> 16),
+      invalid_(0xfff) {}
+
+Imm16::Imm16(uint32_t imm)
+    : lower_(imm & 0xfff), pad_(0), upper_((imm >> 12) & 0xf), invalid_(0) {
+  MOZ_ASSERT(decode() == imm);
+}
+
+Imm16::Imm16() : invalid_(0xfff) {}
+
+void Assembler::finish() {
+  flush();
+  MOZ_ASSERT(!isFinished);
+  isFinished = true;
+}
+
+bool Assembler::appendRawCode(const uint8_t* code, size_t numBytes) {
+  flush();
+  return m_buffer.appendRawCode(code, numBytes);
+}
+
+bool Assembler::reserve(size_t size) {
+  // This buffer uses fixed-size chunks so there's no point in reserving
+  // now vs. on-demand.
+  return !oom();
+}
+
+bool Assembler::swapBuffer(wasm::Bytes& bytes) {
+  // For now, specialize to the one use case. As long as wasm::Bytes is a
+  // Vector, not a linked-list of chunks, there's not much we can do other
+  // than copy.
+  MOZ_ASSERT(bytes.empty());
+  if (!bytes.resize(bytesNeeded())) {
+    return false;
+  }
+  m_buffer.executableCopy(bytes.begin());
+  return true;
+}
+
+void Assembler::executableCopy(uint8_t* buffer) {
+  MOZ_ASSERT(isFinished);
+  m_buffer.executableCopy(buffer);
+}
+
+class RelocationIterator {
+  CompactBufferReader reader_;
+  // Offset in bytes.
+  uint32_t offset_;
+
+ public:
+  explicit RelocationIterator(CompactBufferReader& reader) : reader_(reader) {}
+
+  bool read() {
+    if (!reader_.more()) {
+      return false;
+    }
+    offset_ = reader_.readUnsigned();
+    return true;
+  }
+
+  uint32_t offset() const { return offset_; }
+};
+
+template <class Iter>
+const uint32_t* Assembler::GetCF32Target(Iter* iter) {
+  Instruction* inst1 = iter->cur();
+
+  if (inst1->is<InstBranchImm>()) {
+    // See if we have a simple case, b #offset.
+    BOffImm imm;
+    InstBranchImm* jumpB = inst1->as<InstBranchImm>();
+    jumpB->extractImm(&imm);
+    return imm.getDest(inst1)->raw();
+  }
+
+  if (inst1->is<InstMovW>()) {
+    // See if we have the complex case:
+    //  movw r_temp, #imm1
+    //  movt r_temp, #imm2
+    //  bx r_temp
+    // OR
+    //  movw r_temp, #imm1
+    //  movt r_temp, #imm2
+    //  str pc, [sp]
+    //  bx r_temp
+
+    Imm16 targ_bot;
+    Imm16 targ_top;
+    Register temp;
+
+    // Extract both the temp register and the bottom immediate.
+    InstMovW* bottom = inst1->as<InstMovW>();
+    bottom->extractImm(&targ_bot);
+    bottom->extractDest(&temp);
+
+    // Extract the top part of the immediate.
+    Instruction* inst2 = iter->next();
+    MOZ_ASSERT(inst2->is<InstMovT>());
+    InstMovT* top = inst2->as<InstMovT>();
+    top->extractImm(&targ_top);
+
+    // Make sure they are being loaded into the same register.
+    MOZ_ASSERT(top->checkDest(temp));
+
+    // Make sure we're branching to the same register.
+#ifdef DEBUG
+    // A toggled call sometimes has a NOP instead of a branch for the third
+    // instruction. No way to assert that it's valid in that situation.
+    Instruction* inst3 = iter->next();
+    if (!inst3->is<InstNOP>()) {
+      InstBranchReg* realBranch = nullptr;
+      if (inst3->is<InstBranchReg>()) {
+        realBranch = inst3->as<InstBranchReg>();
+      } else {
+        Instruction* inst4 = iter->next();
+        realBranch = inst4->as<InstBranchReg>();
+      }
+      MOZ_ASSERT(realBranch->checkDest(temp));
+    }
+#endif
+
+    uint32_t* dest = (uint32_t*)(targ_bot.decode() | (targ_top.decode() << 16));
+    return dest;
+  }
+
+  if (inst1->is<InstLDR>()) {
+    return *(uint32_t**)inst1->as<InstLDR>()->dest();
+  }
+
+  MOZ_CRASH("unsupported branch relocation");
+}
+
+uintptr_t Assembler::GetPointer(uint8_t* instPtr) {
+  InstructionIterator iter((Instruction*)instPtr);
+  uintptr_t ret = (uintptr_t)GetPtr32Target(iter, nullptr, nullptr);
+  return ret;
+}
+
+const uint32_t* Assembler::GetPtr32Target(InstructionIterator start,
+                                          Register* dest, RelocStyle* style) {
+  Instruction* load1 = start.cur();
+  Instruction* load2 = start.next();
+
+  if (load1->is<InstMovW>() && load2->is<InstMovT>()) {
+    if (style) {
+      *style = L_MOVWT;
+    }
+
+    // See if we have the complex case:
+    //  movw r_temp, #imm1
+    //  movt r_temp, #imm2
+
+    Imm16 targ_bot;
+    Imm16 targ_top;
+    Register temp;
+
+    // Extract both the temp register and the bottom immediate.
+    InstMovW* bottom = load1->as<InstMovW>();
+    bottom->extractImm(&targ_bot);
+    bottom->extractDest(&temp);
+
+    // Extract the top part of the immediate.
+    InstMovT* top = load2->as<InstMovT>();
+    top->extractImm(&targ_top);
+
+    // Make sure they are being loaded into the same register.
+    MOZ_ASSERT(top->checkDest(temp));
+
+    if (dest) {
+      *dest = temp;
+    }
+
+    uint32_t* value =
+        (uint32_t*)(targ_bot.decode() | (targ_top.decode() << 16));
+    return value;
+  }
+
+  if (load1->is<InstLDR>()) {
+    if (style) {
+      *style = L_LDR;
+    }
+    if (dest) {
+      *dest = toRD(*load1);
+    }
+    return *(uint32_t**)load1->as<InstLDR>()->dest();
+  }
+
+  MOZ_CRASH("unsupported relocation");
+}
+
+static JitCode* CodeFromJump(InstructionIterator* jump) {
+  uint8_t* target = (uint8_t*)Assembler::GetCF32Target(jump);
+  return JitCode::FromExecutable(target);
+}
+
+void Assembler::TraceJumpRelocations(JSTracer* trc, JitCode* code,
+                                     CompactBufferReader& reader) {
+  RelocationIterator iter(reader);
+  while (iter.read()) {
+    InstructionIterator institer((Instruction*)(code->raw() + iter.offset()));
+    JitCode* child = CodeFromJump(&institer);
+    TraceManuallyBarrieredEdge(trc, &child, "rel32");
+  }
+}
+
+static void TraceOneDataRelocation(JSTracer* trc,
+                                   mozilla::Maybe<AutoWritableJitCode>& awjc,
+                                   JitCode* code, InstructionIterator iter) {
+  Register dest;
+  Assembler::RelocStyle rs;
+  const void* prior = Assembler::GetPtr32Target(iter, &dest, &rs);
+  void* ptr = const_cast<void*>(prior);
+
+  // No barrier needed since these are constants.
+  TraceManuallyBarrieredGenericPointerEdge(
+      trc, reinterpret_cast<gc::Cell**>(&ptr), "jit-masm-ptr");
+
+  if (ptr != prior) {
+    if (awjc.isNothing()) {
+      awjc.emplace(code);
+    }
+
+    MacroAssemblerARM::ma_mov_patch(Imm32(int32_t(ptr)), dest,
+                                    Assembler::Always, rs, iter);
+  }
+}
+
+/* static */
+void Assembler::TraceDataRelocations(JSTracer* trc, JitCode* code,
+                                     CompactBufferReader& reader) {
+  mozilla::Maybe<AutoWritableJitCode> awjc;
+  while (reader.more()) {
+    size_t offset = reader.readUnsigned();
+    InstructionIterator iter((Instruction*)(code->raw() + offset));
+    TraceOneDataRelocation(trc, awjc, code, iter);
+  }
+}
+
+void Assembler::copyJumpRelocationTable(uint8_t* dest) {
+  if (jumpRelocations_.length()) {
+    memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length());
+  }
+}
+
+void Assembler::copyDataRelocationTable(uint8_t* dest) {
+  if (dataRelocations_.length()) {
+    memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length());
+  }
+}
+
+void Assembler::processCodeLabels(uint8_t* rawCode) {
+  for (const CodeLabel& label : codeLabels_) {
+    Bind(rawCode, label);
+  }
+}
+
+void Assembler::writeCodePointer(CodeLabel* label) {
+  m_buffer.assertNoPoolAndNoNops();
+  BufferOffset off = writeInst(-1);
+  label->patchAt()->bind(off.getOffset());
+}
+
+void Assembler::Bind(uint8_t* rawCode, const CodeLabel& label) {
+  size_t offset = label.patchAt().offset();
+  size_t target = label.target().offset();
+  *reinterpret_cast<const void**>(rawCode + offset) = rawCode + target;
+}
+
+Assembler::Condition Assembler::InvertCondition(Condition cond) {
+  const uint32_t ConditionInversionBit = 0x10000000;
+  return Condition(ConditionInversionBit ^ cond);
+}
+
+Assembler::Condition Assembler::UnsignedCondition(Condition cond) {
+  switch (cond) {
+    case Zero:
+    case NonZero:
+      return cond;
+    case LessThan:
+    case Below:
+      return Below;
+    case LessThanOrEqual:
+    case BelowOrEqual:
+      return BelowOrEqual;
+    case GreaterThan:
+    case Above:
+      return Above;
+    case AboveOrEqual:
+    case GreaterThanOrEqual:
+      return AboveOrEqual;
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+Assembler::Condition Assembler::ConditionWithoutEqual(Condition cond) {
+  switch (cond) {
+    case LessThan:
+    case LessThanOrEqual:
+      return LessThan;
+    case Below:
+    case BelowOrEqual:
+      return Below;
+    case GreaterThan:
+    case GreaterThanOrEqual:
+      return GreaterThan;
+    case Above:
+    case AboveOrEqual:
+      return Above;
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+Assembler::DoubleCondition Assembler::InvertCondition(DoubleCondition cond) {
+  const uint32_t ConditionInversionBit = 0x10000000;
+  return DoubleCondition(ConditionInversionBit ^ cond);
+}
+
+Imm8::TwoImm8mData Imm8::EncodeTwoImms(uint32_t imm) {
+  // In the ideal case, we are looking for a number that (in binary) looks
+  // like:
+  //   0b((00)*)n_1((00)*)n_2((00)*)
+  //      left  n1   mid  n2
+  //   where both n_1 and n_2 fit into 8 bits.
+  // Since this is being done with rotates, we also need to handle the case
+  // that one of these numbers is in fact split between the left and right
+  // sides, in which case the constant will look like:
+  //   0bn_1a((00)*)n_2((00)*)n_1b
+  //     n1a  mid  n2   rgh    n1b
+  // Also remember, values are rotated by multiples of two, and left, mid or
+  // right can have length zero.
+  uint32_t imm1, imm2;
+  int left = CountLeadingZeroes32(imm) & 0x1E;
+  uint32_t no_n1 = imm & ~(0xff << (24 - left));
+
+  // Not technically needed: this case only happens if we can encode as a
+  // single imm8m. There is a perfectly reasonable encoding in this case, but
+  // we shouldn't encourage people to do things like this.
+  if (no_n1 == 0) {
+    return TwoImm8mData();
+  }
+
+  int mid = CountLeadingZeroes32(no_n1) & 0x1E;
+  uint32_t no_n2 =
+      no_n1 & ~((0xff << ((24 - mid) & 0x1f)) | 0xff >> ((8 + mid) & 0x1f));
+
+  if (no_n2 == 0) {
+    // We hit the easy case, no wraparound.
+    // Note: a single constant *may* look like this.
+    int imm1shift = left + 8;
+    int imm2shift = mid + 8;
+    imm1 = (imm >> (32 - imm1shift)) & 0xff;
+    if (imm2shift >= 32) {
+      imm2shift = 0;
+      // This assert does not always hold, in fact, this would lead to
+      // some incredibly subtle bugs.
+      // assert((imm & 0xff) == no_n1);
+      imm2 = no_n1;
+    } else {
+      imm2 = ((imm >> (32 - imm2shift)) | (imm << imm2shift)) & 0xff;
+      MOZ_ASSERT(((no_n1 >> (32 - imm2shift)) | (no_n1 << imm2shift)) == imm2);
+    }
+    MOZ_ASSERT((imm1shift & 0x1) == 0);
+    MOZ_ASSERT((imm2shift & 0x1) == 0);
+    return TwoImm8mData(datastore::Imm8mData(imm1, imm1shift >> 1),
+                        datastore::Imm8mData(imm2, imm2shift >> 1));
+  }
+
+  // Either it wraps, or it does not fit. If we initially chopped off more
+  // than 8 bits, then it won't fit.
+  if (left >= 8) {
+    return TwoImm8mData();
+  }
+
+  int right = 32 - (CountLeadingZeroes32(no_n2) & 30);
+  // All remaining set bits *must* fit into the lower 8 bits.
+  // The right == 8 case should be handled by the previous case.
+  if (right > 8) {
+    return TwoImm8mData();
+  }
+
+  // Make sure the initial bits that we removed for no_n1 fit into the
+  // 8-(32-right) leftmost bits.
+  if (((imm & (0xff << (24 - left))) << (8 - right)) != 0) {
+    // BUT we may have removed more bits than we needed to for no_n1
+    // 0x04104001 e.g. we can encode 0x104 with a single op, then 0x04000001
+    // with a second, but we try to encode 0x0410000 and find that we need a
+    // second op for 0x4000, and 0x1 cannot be included in the encoding of
+    // 0x04100000.
+    no_n1 = imm & ~((0xff >> (8 - right)) | (0xff << (24 + right)));
+    mid = CountLeadingZeroes32(no_n1) & 30;
+    no_n2 = no_n1 & ~((0xff << ((24 - mid) & 31)) | 0xff >> ((8 + mid) & 31));
+    if (no_n2 != 0) {
+      return TwoImm8mData();
+    }
+  }
+
+  // Now assemble all of this information into a two coherent constants it is
+  // a rotate right from the lower 8 bits.
+  int imm1shift = 8 - right;
+  imm1 = 0xff & ((imm << imm1shift) | (imm >> (32 - imm1shift)));
+  MOZ_ASSERT((imm1shift & ~0x1e) == 0);
+  // left + 8 + mid is the position of the leftmost bit of n_2.
+  // We needed to rotate 0x000000ab right by 8 in order to get 0xab000000,
+  // then shift again by the leftmost bit in order to get the constant that we
+  // care about.
+  int imm2shift = mid + 8;
+  imm2 = ((imm >> (32 - imm2shift)) | (imm << imm2shift)) & 0xff;
+  MOZ_ASSERT((imm1shift & 0x1) == 0);
+  MOZ_ASSERT((imm2shift & 0x1) == 0);
+  return TwoImm8mData(datastore::Imm8mData(imm1, imm1shift >> 1),
+                      datastore::Imm8mData(imm2, imm2shift >> 1));
+}
+
+ALUOp jit::ALUNeg(ALUOp op, Register dest, Register scratch, Imm32* imm,
+                  Register* negDest) {
+  // Find an alternate ALUOp to get the job done, and use a different imm.
+  *negDest = dest;
+  switch (op) {
+    case OpMov:
+      *imm = Imm32(~imm->value);
+      return OpMvn;
+    case OpMvn:
+      *imm = Imm32(~imm->value);
+      return OpMov;
+    case OpAnd:
+      *imm = Imm32(~imm->value);
+      return OpBic;
+    case OpBic:
+      *imm = Imm32(~imm->value);
+      return OpAnd;
+    case OpAdd:
+      *imm = Imm32(-imm->value);
+      return OpSub;
+    case OpSub:
+      *imm = Imm32(-imm->value);
+      return OpAdd;
+    case OpCmp:
+      *imm = Imm32(-imm->value);
+      return OpCmn;
+    case OpCmn:
+      *imm = Imm32(-imm->value);
+      return OpCmp;
+    case OpTst:
+      MOZ_ASSERT(dest == InvalidReg);
+      *imm = Imm32(~imm->value);
+      *negDest = scratch;
+      return OpBic;
+      // orr has orn on thumb2 only.
+    default:
+      return OpInvalid;
+  }
+}
+
+bool jit::can_dbl(ALUOp op) {
+  // Some instructions can't be processed as two separate instructions such as
+  // and, and possibly add (when we're setting ccodes). There is also some
+  // hilarity with *reading* condition codes. For example, adc dest, src1,
+  // 0xfff; (add with carry) can be split up into adc dest, src1, 0xf00; add
+  // dest, dest, 0xff, since "reading" the condition code increments the
+  // result by one conditionally, that only needs to be done on one of the two
+  // instructions.
+  switch (op) {
+    case OpBic:
+    case OpAdd:
+    case OpSub:
+    case OpEor:
+    case OpOrr:
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool jit::condsAreSafe(ALUOp op) {
+  // Even when we are setting condition codes, sometimes we can get away with
+  // splitting an operation into two. For example, if our immediate is
+  // 0x00ff00ff, and the operation is eors we can split this in half, since x
+  // ^ 0x00ff0000 ^ 0x000000ff should set all of its condition codes exactly
+  // the same as x ^ 0x00ff00ff. However, if the operation were adds, we
+  // cannot split this in half. If the source on the add is 0xfff00ff0, the
+  // result sholud be 0xef10ef, but do we set the overflow bit or not?
+  // Depending on which half is performed first (0x00ff0000 or 0x000000ff) the
+  // V bit will be set differently, and *not* updating the V bit would be
+  // wrong. Theoretically, the following should work:
+  //  adds r0, r1, 0x00ff0000;
+  //  addsvs r0, r1, 0x000000ff;
+  //  addvc r0, r1, 0x000000ff;
+  // But this is 3 instructions, and at that point, we might as well use
+  // something else.
+  switch (op) {
+    case OpBic:
+    case OpOrr:
+    case OpEor:
+      return true;
+    default:
+      return false;
+  }
+}
+
+ALUOp jit::getDestVariant(ALUOp op) {
+  // All of the compare operations are dest-less variants of a standard
+  // operation. Given the dest-less variant, return the dest-ful variant.
+  switch (op) {
+    case OpCmp:
+      return OpSub;
+    case OpCmn:
+      return OpAdd;
+    case OpTst:
+      return OpAnd;
+    case OpTeq:
+      return OpEor;
+    default:
+      return op;
+  }
+}
+
+O2RegImmShift jit::O2Reg(Register r) { return O2RegImmShift(r, LSL, 0); }
+
+O2RegImmShift jit::lsl(Register r, int amt) {
+  MOZ_ASSERT(0 <= amt && amt <= 31);
+  return O2RegImmShift(r, LSL, amt);
+}
+
+O2RegImmShift jit::lsr(Register r, int amt) {
+  MOZ_ASSERT(1 <= amt && amt <= 32);
+  return O2RegImmShift(r, LSR, amt);
+}
+
+O2RegImmShift jit::ror(Register r, int amt) {
+  MOZ_ASSERT(1 <= amt && amt <= 31);
+  return O2RegImmShift(r, ROR, amt);
+}
+O2RegImmShift jit::rol(Register r, int amt) {
+  MOZ_ASSERT(1 <= amt && amt <= 31);
+  return O2RegImmShift(r, ROR, 32 - amt);
+}
+
+O2RegImmShift jit::asr(Register r, int amt) {
+  MOZ_ASSERT(1 <= amt && amt <= 32);
+  return O2RegImmShift(r, ASR, amt);
+}
+
+O2RegRegShift jit::lsl(Register r, Register amt) {
+  return O2RegRegShift(r, LSL, amt);
+}
+
+O2RegRegShift jit::lsr(Register r, Register amt) {
+  return O2RegRegShift(r, LSR, amt);
+}
+
+O2RegRegShift jit::ror(Register r, Register amt) {
+  return O2RegRegShift(r, ROR, amt);
+}
+
+O2RegRegShift jit::asr(Register r, Register amt) {
+  return O2RegRegShift(r, ASR, amt);
+}
+
+static js::jit::DoubleEncoder doubleEncoder;
+
+/* static */
+const js::jit::VFPImm js::jit::VFPImm::One(0x3FF00000);
+
+js::jit::VFPImm::VFPImm(uint32_t top) {
+  data_ = -1;
+  datastore::Imm8VFPImmData tmp;
+  if (doubleEncoder.lookup(top, &tmp)) {
+    data_ = tmp.encode();
+  }
+}
+
+BOffImm::BOffImm(const Instruction& inst) : data_(inst.encode() & 0x00ffffff) {}
+
+Instruction* BOffImm::getDest(Instruction* src) const {
+  // TODO: It is probably worthwhile to verify that src is actually a branch.
+  // NOTE: This does not explicitly shift the offset of the destination left by
+  // 2, since it is indexing into an array of instruction sized objects.
+  return &src[((int32_t(data_) << 8) >> 8) + 2];
+}
+
+const js::jit::DoubleEncoder::DoubleEntry js::jit::DoubleEncoder::table[256] = {
+#include "jit/arm/DoubleEntryTable.tbl"
+};
+
+// VFPRegister implementation
+VFPRegister VFPRegister::doubleOverlay(unsigned int which) const {
+  MOZ_ASSERT(!_isInvalid);
+  MOZ_ASSERT(which == 0);
+  if (kind != Double) {
+    return VFPRegister(code_ >> 1, Double);
+  }
+  return *this;
+}
+VFPRegister VFPRegister::singleOverlay(unsigned int which) const {
+  MOZ_ASSERT(!_isInvalid);
+  if (kind == Double) {
+    // There are no corresponding float registers for d16-d31.
+    MOZ_ASSERT(code_ < 16);
+    MOZ_ASSERT(which < 2);
+    return VFPRegister((code_ << 1) + which, Single);
+  }
+  MOZ_ASSERT(which == 0);
+  return VFPRegister(code_, Single);
+}
+
+static_assert(
+    FloatRegisters::TotalDouble <= 16,
+    "We assume that every Double register also has an Integer personality");
+
+VFPRegister VFPRegister::sintOverlay(unsigned int which) const {
+  MOZ_ASSERT(!_isInvalid);
+  if (kind == Double) {
+    // There are no corresponding float registers for d16-d31.
+    MOZ_ASSERT(code_ < 16);
+    MOZ_ASSERT(which < 2);
+    return VFPRegister((code_ << 1) + which, Int);
+  }
+  MOZ_ASSERT(which == 0);
+  return VFPRegister(code_, Int);
+}
+VFPRegister VFPRegister::uintOverlay(unsigned int which) const {
+  MOZ_ASSERT(!_isInvalid);
+  if (kind == Double) {
+    // There are no corresponding float registers for d16-d31.
+    MOZ_ASSERT(code_ < 16);
+    MOZ_ASSERT(which < 2);
+    return VFPRegister((code_ << 1) + which, UInt);
+  }
+  MOZ_ASSERT(which == 0);
+  return VFPRegister(code_, UInt);
+}
+
+bool Assembler::oom() const {
+  return AssemblerShared::oom() || m_buffer.oom() || jumpRelocations_.oom() ||
+         dataRelocations_.oom();
+}
+
+// Size of the instruction stream, in bytes. Including pools. This function
+// expects all pools that need to be placed have been placed. If they haven't
+// then we need to go an flush the pools :(
+size_t Assembler::size() const { return m_buffer.size(); }
+// Size of the relocation table, in bytes.
+size_t Assembler::jumpRelocationTableBytes() const {
+  return jumpRelocations_.length();
+}
+size_t Assembler::dataRelocationTableBytes() const {
+  return dataRelocations_.length();
+}
+
+// Size of the data table, in bytes.
+size_t Assembler::bytesNeeded() const {
+  return size() + jumpRelocationTableBytes() + dataRelocationTableBytes();
+}
+
+// Allocate memory for a branch instruction, it will be overwritten
+// subsequently and should not be disassembled.
+
+BufferOffset Assembler::allocBranchInst() {
+  return m_buffer.putInt(Always | InstNOP::NopInst);
+}
+
+void Assembler::WriteInstStatic(uint32_t x, uint32_t* dest) {
+  MOZ_ASSERT(dest != nullptr);
+  *dest = x;
+}
+
+void Assembler::haltingAlign(int alignment) {
+  // HLT with payload 0xBAAD
+  m_buffer.align(alignment, 0xE1000070 | (0xBAA << 8) | 0xD);
+}
+
+void Assembler::nopAlign(int alignment) { m_buffer.align(alignment); }
+
+BufferOffset Assembler::as_nop() { return writeInst(0xe320f000); }
+
+static uint32_t EncodeAlu(Register dest, Register src1, Operand2 op2, ALUOp op,
+                          SBit s, Assembler::Condition c) {
+  return (int)op | (int)s | (int)c | op2.encode() |
+         ((dest == InvalidReg) ? 0 : RD(dest)) |
+         ((src1 == InvalidReg) ? 0 : RN(src1));
+}
+
+BufferOffset Assembler::as_alu(Register dest, Register src1, Operand2 op2,
+                               ALUOp op, SBit s, Condition c) {
+  return writeInst(EncodeAlu(dest, src1, op2, op, s, c));
+}
+
+BufferOffset Assembler::as_mov(Register dest, Operand2 op2, SBit s,
+                               Condition c) {
+  return as_alu(dest, InvalidReg, op2, OpMov, s, c);
+}
+
+/* static */
+void Assembler::as_alu_patch(Register dest, Register src1, Operand2 op2,
+                             ALUOp op, SBit s, Condition c, uint32_t* pos) {
+  WriteInstStatic(EncodeAlu(dest, src1, op2, op, s, c), pos);
+}
+
+/* static */
+void Assembler::as_mov_patch(Register dest, Operand2 op2, SBit s, Condition c,
+                             uint32_t* pos) {
+  as_alu_patch(dest, InvalidReg, op2, OpMov, s, c, pos);
+}
+
+BufferOffset Assembler::as_mvn(Register dest, Operand2 op2, SBit s,
+                               Condition c) {
+  return as_alu(dest, InvalidReg, op2, OpMvn, s, c);
+}
+
+// Logical operations.
+BufferOffset Assembler::as_and(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpAnd, s, c);
+}
+BufferOffset Assembler::as_bic(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpBic, s, c);
+}
+BufferOffset Assembler::as_eor(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpEor, s, c);
+}
+BufferOffset Assembler::as_orr(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpOrr, s, c);
+}
+
+// Reverse byte operations.
+BufferOffset Assembler::as_rev(Register dest, Register src, Condition c) {
+  return writeInst((int)c | 0b0000'0110'1011'1111'0000'1111'0011'0000 |
+                   RD(dest) | src.code());
+}
+BufferOffset Assembler::as_rev16(Register dest, Register src, Condition c) {
+  return writeInst((int)c | 0b0000'0110'1011'1111'0000'1111'1011'0000 |
+                   RD(dest) | src.code());
+}
+BufferOffset Assembler::as_revsh(Register dest, Register src, Condition c) {
+  return writeInst((int)c | 0b0000'0110'1111'1111'0000'1111'1011'0000 |
+                   RD(dest) | src.code());
+}
+
+// Mathematical operations.
+BufferOffset Assembler::as_adc(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpAdc, s, c);
+}
+BufferOffset Assembler::as_add(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpAdd, s, c);
+}
+BufferOffset Assembler::as_sbc(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpSbc, s, c);
+}
+BufferOffset Assembler::as_sub(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpSub, s, c);
+}
+BufferOffset Assembler::as_rsb(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpRsb, s, c);
+}
+BufferOffset Assembler::as_rsc(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpRsc, s, c);
+}
+
+// Test operations.
+BufferOffset Assembler::as_cmn(Register src1, Operand2 op2, Condition c) {
+  return as_alu(InvalidReg, src1, op2, OpCmn, SetCC, c);
+}
+BufferOffset Assembler::as_cmp(Register src1, Operand2 op2, Condition c) {
+  return as_alu(InvalidReg, src1, op2, OpCmp, SetCC, c);
+}
+BufferOffset Assembler::as_teq(Register src1, Operand2 op2, Condition c) {
+  return as_alu(InvalidReg, src1, op2, OpTeq, SetCC, c);
+}
+BufferOffset Assembler::as_tst(Register src1, Operand2 op2, Condition c) {
+  return as_alu(InvalidReg, src1, op2, OpTst, SetCC, c);
+}
+
+static constexpr Register NoAddend{Registers::pc};
+
+static const int SignExtend = 0x06000070;
+
+enum SignExtend {
+  SxSxtb = 10 << 20,
+  SxSxth = 11 << 20,
+  SxUxtb = 14 << 20,
+  SxUxth = 15 << 20
+};
+
+// Sign extension operations.
+BufferOffset Assembler::as_sxtb(Register dest, Register src, int rotate,
+                                Condition c) {
+  return writeInst((int)c | SignExtend | SxSxtb | RN(NoAddend) | RD(dest) |
+                   ((rotate & 3) << 10) | src.code());
+}
+BufferOffset Assembler::as_sxth(Register dest, Register src, int rotate,
+                                Condition c) {
+  return writeInst((int)c | SignExtend | SxSxth | RN(NoAddend) | RD(dest) |
+                   ((rotate & 3) << 10) | src.code());
+}
+BufferOffset Assembler::as_uxtb(Register dest, Register src, int rotate,
+                                Condition c) {
+  return writeInst((int)c | SignExtend | SxUxtb | RN(NoAddend) | RD(dest) |
+                   ((rotate & 3) << 10) | src.code());
+}
+BufferOffset Assembler::as_uxth(Register dest, Register src, int rotate,
+                                Condition c) {
+  return writeInst((int)c | SignExtend | SxUxth | RN(NoAddend) | RD(dest) |
+                   ((rotate & 3) << 10) | src.code());
+}
+
+static uint32_t EncodeMovW(Register dest, Imm16 imm, Assembler::Condition c) {
+  MOZ_ASSERT(HasMOVWT());
+  return 0x03000000 | c | imm.encode() | RD(dest);
+}
+
+static uint32_t EncodeMovT(Register dest, Imm16 imm, Assembler::Condition c) {
+  MOZ_ASSERT(HasMOVWT());
+  return 0x03400000 | c | imm.encode() | RD(dest);
+}
+
+// Not quite ALU worthy, but these are useful none the less. These also have
+// the isue of these being formatted completly differently from the standard ALU
+// operations.
+BufferOffset Assembler::as_movw(Register dest, Imm16 imm, Condition c) {
+  return writeInst(EncodeMovW(dest, imm, c));
+}
+
+/* static */
+void Assembler::as_movw_patch(Register dest, Imm16 imm, Condition c,
+                              Instruction* pos) {
+  WriteInstStatic(EncodeMovW(dest, imm, c), (uint32_t*)pos);
+}
+
+BufferOffset Assembler::as_movt(Register dest, Imm16 imm, Condition c) {
+  return writeInst(EncodeMovT(dest, imm, c));
+}
+
+/* static */
+void Assembler::as_movt_patch(Register dest, Imm16 imm, Condition c,
+                              Instruction* pos) {
+  WriteInstStatic(EncodeMovT(dest, imm, c), (uint32_t*)pos);
+}
+
+static const int mull_tag = 0x90;
+
+BufferOffset Assembler::as_genmul(Register dhi, Register dlo, Register rm,
+                                  Register rn, MULOp op, SBit s, Condition c) {
+  return writeInst(RN(dhi) | maybeRD(dlo) | RM(rm) | rn.code() | op | s | c |
+                   mull_tag);
+}
+BufferOffset Assembler::as_mul(Register dest, Register src1, Register src2,
+                               SBit s, Condition c) {
+  return as_genmul(dest, InvalidReg, src1, src2, OpmMul, s, c);
+}
+BufferOffset Assembler::as_mla(Register dest, Register acc, Register src1,
+                               Register src2, SBit s, Condition c) {
+  return as_genmul(dest, acc, src1, src2, OpmMla, s, c);
+}
+BufferOffset Assembler::as_umaal(Register destHI, Register destLO,
+                                 Register src1, Register src2, Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmUmaal, LeaveCC, c);
+}
+BufferOffset Assembler::as_mls(Register dest, Register acc, Register src1,
+                               Register src2, Condition c) {
+  return as_genmul(dest, acc, src1, src2, OpmMls, LeaveCC, c);
+}
+
+BufferOffset Assembler::as_umull(Register destHI, Register destLO,
+                                 Register src1, Register src2, SBit s,
+                                 Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmUmull, s, c);
+}
+
+BufferOffset Assembler::as_umlal(Register destHI, Register destLO,
+                                 Register src1, Register src2, SBit s,
+                                 Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmUmlal, s, c);
+}
+
+BufferOffset Assembler::as_smull(Register destHI, Register destLO,
+                                 Register src1, Register src2, SBit s,
+                                 Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmSmull, s, c);
+}
+
+BufferOffset Assembler::as_smlal(Register destHI, Register destLO,
+                                 Register src1, Register src2, SBit s,
+                                 Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmSmlal, s, c);
+}
+
+BufferOffset Assembler::as_sdiv(Register rd, Register rn, Register rm,
+                                Condition c) {
+  return writeInst(0x0710f010 | c | RN(rd) | RM(rm) | rn.code());
+}
+
+BufferOffset Assembler::as_udiv(Register rd, Register rn, Register rm,
+                                Condition c) {
+  return writeInst(0x0730f010 | c | RN(rd) | RM(rm) | rn.code());
+}
+
+BufferOffset Assembler::as_clz(Register dest, Register src, Condition c) {
+  MOZ_ASSERT(src != pc && dest != pc);
+  return writeInst(RD(dest) | src.code() | c | 0x016f0f10);
+}
+
+// Data transfer instructions: ldr, str, ldrb, strb. Using an int to
+// differentiate between 8 bits and 32 bits is overkill, but meh.
+
+static uint32_t EncodeDtr(LoadStore ls, int size, Index mode, Register rt,
+                          DTRAddr addr, Assembler::Condition c) {
+  MOZ_ASSERT(mode == Offset || (rt != addr.getBase() && pc != addr.getBase()));
+  MOZ_ASSERT(size == 32 || size == 8);
+  return 0x04000000 | ls | (size == 8 ? 0x00400000 : 0) | mode | c | RT(rt) |
+         addr.encode();
+}
+
+BufferOffset Assembler::as_dtr(LoadStore ls, int size, Index mode, Register rt,
+                               DTRAddr addr, Condition c) {
+  return writeInst(EncodeDtr(ls, size, mode, rt, addr, c));
+}
+
+/* static */
+void Assembler::as_dtr_patch(LoadStore ls, int size, Index mode, Register rt,
+                             DTRAddr addr, Condition c, uint32_t* dest) {
+  WriteInstStatic(EncodeDtr(ls, size, mode, rt, addr, c), dest);
+}
+
+class PoolHintData {
+ public:
+  enum LoadType {
+    // Set 0 to bogus, since that is the value most likely to be
+    // accidentally left somewhere.
+    PoolBOGUS = 0,
+    PoolDTR = 1,
+    PoolBranch = 2,
+    PoolVDTR = 3
+  };
+
+ private:
+  uint32_t index_ : 16;
+  uint32_t cond_ : 4;
+  uint32_t loadType_ : 2;
+  uint32_t destReg_ : 5;
+  uint32_t destType_ : 1;
+  uint32_t ONES : 4;
+
+  static const uint32_t ExpectedOnes = 0xfu;
+
+ public:
+  void init(uint32_t index, Assembler::Condition cond, LoadType lt,
+            Register destReg) {
+    index_ = index;
+    MOZ_ASSERT(index_ == index);
+    cond_ = cond >> 28;
+    MOZ_ASSERT(cond_ == cond >> 28);
+    loadType_ = lt;
+    ONES = ExpectedOnes;
+    destReg_ = destReg.code();
+    destType_ = 0;
+  }
+  void init(uint32_t index, Assembler::Condition cond, LoadType lt,
+            const VFPRegister& destReg) {
+    MOZ_ASSERT(destReg.isFloat());
+    index_ = index;
+    MOZ_ASSERT(index_ == index);
+    cond_ = cond >> 28;
+    MOZ_ASSERT(cond_ == cond >> 28);
+    loadType_ = lt;
+    ONES = ExpectedOnes;
+    destReg_ = destReg.id();
+    destType_ = destReg.isDouble();
+  }
+  Assembler::Condition getCond() const {
+    return Assembler::Condition(cond_ << 28);
+  }
+
+  Register getReg() const { return Register::FromCode(destReg_); }
+  VFPRegister getVFPReg() const {
+    VFPRegister r = VFPRegister(
+        destReg_, destType_ ? VFPRegister::Double : VFPRegister::Single);
+    return r;
+  }
+
+  int32_t getIndex() const { return index_; }
+  void setIndex(uint32_t index) {
+    MOZ_ASSERT(ONES == ExpectedOnes && loadType_ != PoolBOGUS);
+    index_ = index;
+    MOZ_ASSERT(index_ == index);
+  }
+
+  LoadType getLoadType() const {
+    // If this *was* a PoolBranch, but the branch has already been bound
+    // then this isn't going to look like a real poolhintdata, but we still
+    // want to lie about it so everyone knows it *used* to be a branch.
+    if (ONES != ExpectedOnes) {
+      return PoolHintData::PoolBranch;
+    }
+    return static_cast<LoadType>(loadType_);
+  }
+
+  bool isValidPoolHint() const {
+    // Most instructions cannot have a condition that is 0xf. Notable
+    // exceptions are blx and the entire NEON instruction set. For the
+    // purposes of pool loads, and possibly patched branches, the possible
+    // instructions are ldr and b, neither of which can have a condition
+    // code of 0xf.
+    return ONES == ExpectedOnes;
+  }
+};
+
+union PoolHintPun {
+  PoolHintData phd;
+  uint32_t raw;
+};
+
+// Handles all of the other integral data transferring functions: ldrsb, ldrsh,
+// ldrd, etc. The size is given in bits.
+BufferOffset Assembler::as_extdtr(LoadStore ls, int size, bool IsSigned,
+                                  Index mode, Register rt, EDtrAddr addr,
+                                  Condition c) {
+  int extra_bits2 = 0;
+  int extra_bits1 = 0;
+  switch (size) {
+    case 8:
+      MOZ_ASSERT(IsSigned);
+      MOZ_ASSERT(ls != IsStore);
+      extra_bits1 = 0x1;
+      extra_bits2 = 0x2;
+      break;
+    case 16:
+      // 'case 32' doesn't need to be handled, it is handled by the default
+      // ldr/str.
+      extra_bits2 = 0x01;
+      extra_bits1 = (ls == IsStore) ? 0 : 1;
+      if (IsSigned) {
+        MOZ_ASSERT(ls != IsStore);
+        extra_bits2 |= 0x2;
+      }
+      break;
+    case 64:
+      extra_bits2 = (ls == IsStore) ? 0x3 : 0x2;
+      extra_bits1 = 0;
+      break;
+    default:
+      MOZ_CRASH("unexpected size in as_extdtr");
+  }
+  return writeInst(extra_bits2 << 5 | extra_bits1 << 20 | 0x90 | addr.encode() |
+                   RT(rt) | mode | c);
+}
+
+BufferOffset Assembler::as_dtm(LoadStore ls, Register rn, uint32_t mask,
+                               DTMMode mode, DTMWriteBack wb, Condition c) {
+  return writeInst(0x08000000 | RN(rn) | ls | mode | mask | c | wb);
+}
+
+BufferOffset Assembler::allocLiteralLoadEntry(
+    size_t numInst, unsigned numPoolEntries, PoolHintPun& php, uint8_t* data,
+    const LiteralDoc& doc, ARMBuffer::PoolEntry* pe, bool loadToPC) {
+  uint8_t* inst = (uint8_t*)&php.raw;
+
+  MOZ_ASSERT(inst);
+  MOZ_ASSERT(numInst == 1);  // Or fix the disassembly
+
+  BufferOffset offs =
+      m_buffer.allocEntry(numInst, numPoolEntries, inst, data, pe);
+  propagateOOM(offs.assigned());
+#ifdef JS_DISASM_ARM
+  Instruction* instruction = m_buffer.getInstOrNull(offs);
+  if (instruction) {
+    spewLiteralLoad(php, loadToPC, instruction, doc);
+  }
+#endif
+  return offs;
+}
+
+// This is also used for instructions that might be resolved into branches,
+// or might not.  If dest==pc then it is effectively a branch.
+
+BufferOffset Assembler::as_Imm32Pool(Register dest, uint32_t value,
+                                     Condition c) {
+  PoolHintPun php;
+  php.phd.init(0, c, PoolHintData::PoolDTR, dest);
+  BufferOffset offs = allocLiteralLoadEntry(
+      1, 1, php, (uint8_t*)&value, LiteralDoc(value), nullptr, dest == pc);
+  return offs;
+}
+
+/* static */
+void Assembler::WritePoolEntry(Instruction* addr, Condition c, uint32_t data) {
+  MOZ_ASSERT(addr->is<InstLDR>());
+  *addr->as<InstLDR>()->dest() = data;
+  MOZ_ASSERT(addr->extractCond() == c);
+}
+
+BufferOffset Assembler::as_FImm64Pool(VFPRegister dest, double d, Condition c) {
+  MOZ_ASSERT(dest.isDouble());
+  PoolHintPun php;
+  php.phd.init(0, c, PoolHintData::PoolVDTR, dest);
+  return allocLiteralLoadEntry(1, 2, php, (uint8_t*)&d, LiteralDoc(d));
+}
+
+BufferOffset Assembler::as_FImm32Pool(VFPRegister dest, float f, Condition c) {
+  // Insert floats into the double pool as they have the same limitations on
+  // immediate offset. This wastes 4 bytes padding per float. An alternative
+  // would be to have a separate pool for floats.
+  MOZ_ASSERT(dest.isSingle());
+  PoolHintPun php;
+  php.phd.init(0, c, PoolHintData::PoolVDTR, dest);
+  return allocLiteralLoadEntry(1, 1, php, (uint8_t*)&f, LiteralDoc(f));
+}
+
+// Pool callbacks stuff:
+void Assembler::InsertIndexIntoTag(uint8_t* load_, uint32_t index) {
+  uint32_t* load = (uint32_t*)load_;
+  PoolHintPun php;
+  php.raw = *load;
+  php.phd.setIndex(index);
+  *load = php.raw;
+}
+
+// patchConstantPoolLoad takes the address of the instruction that wants to be
+// patched, and the address of the start of the constant pool, and figures
+// things out from there.
+void Assembler::PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr) {
+  PoolHintData data = *(PoolHintData*)loadAddr;
+  uint32_t* instAddr = (uint32_t*)loadAddr;
+  int offset = (char*)constPoolAddr - (char*)loadAddr;
+  switch (data.getLoadType()) {
+    case PoolHintData::PoolBOGUS:
+      MOZ_CRASH("bogus load type!");
+    case PoolHintData::PoolDTR:
+      Assembler::as_dtr_patch(
+          IsLoad, 32, Offset, data.getReg(),
+          DTRAddr(pc, DtrOffImm(offset + 4 * data.getIndex() - 8)),
+          data.getCond(), instAddr);
+      break;
+    case PoolHintData::PoolBranch:
+      // Either this used to be a poolBranch, and the label was already bound,
+      // so it was replaced with a real branch, or this may happen in the
+      // future. If this is going to happen in the future, then the actual
+      // bits that are written here don't matter (except the condition code,
+      // since that is always preserved across patchings) but if it does not
+      // get bound later, then we want to make sure this is a load from the
+      // pool entry (and the pool entry should be nullptr so it will crash).
+      if (data.isValidPoolHint()) {
+        Assembler::as_dtr_patch(
+            IsLoad, 32, Offset, pc,
+            DTRAddr(pc, DtrOffImm(offset + 4 * data.getIndex() - 8)),
+            data.getCond(), instAddr);
+      }
+      break;
+    case PoolHintData::PoolVDTR: {
+      VFPRegister dest = data.getVFPReg();
+      int32_t imm = offset + (data.getIndex() * 4) - 8;
+      MOZ_ASSERT(-1024 < imm && imm < 1024);
+      Assembler::as_vdtr_patch(IsLoad, dest, VFPAddr(pc, VFPOffImm(imm)),
+                               data.getCond(), instAddr);
+      break;
+    }
+  }
+}
+
+// Atomic instruction stuff:
+
+BufferOffset Assembler::as_ldrexd(Register rt, Register rt2, Register rn,
+                                  Condition c) {
+  MOZ_ASSERT(!(rt.code() & 1) && rt2.code() == rt.code() + 1);
+  MOZ_ASSERT(rt.code() != 14 && rn.code() != 15);
+  return writeInst(0x01b00f9f | (int)c | RT(rt) | RN(rn));
+}
+
+BufferOffset Assembler::as_ldrex(Register rt, Register rn, Condition c) {
+  MOZ_ASSERT(rt.code() != 15 && rn.code() != 15);
+  return writeInst(0x01900f9f | (int)c | RT(rt) | RN(rn));
+}
+
+BufferOffset Assembler::as_ldrexh(Register rt, Register rn, Condition c) {
+  MOZ_ASSERT(rt.code() != 15 && rn.code() != 15);
+  return writeInst(0x01f00f9f | (int)c | RT(rt) | RN(rn));
+}
+
+BufferOffset Assembler::as_ldrexb(Register rt, Register rn, Condition c) {
+  MOZ_ASSERT(rt.code() != 15 && rn.code() != 15);
+  return writeInst(0x01d00f9f | (int)c | RT(rt) | RN(rn));
+}
+
+BufferOffset Assembler::as_strexd(Register rd, Register rt, Register rt2,
+                                  Register rn, Condition c) {
+  MOZ_ASSERT(!(rt.code() & 1) && rt2.code() == rt.code() + 1);
+  MOZ_ASSERT(rt.code() != 14 && rn.code() != 15 && rd.code() != 15);
+  MOZ_ASSERT(rd != rn && rd != rt && rd != rt2);
+  return writeInst(0x01a00f90 | (int)c | RD(rd) | RN(rn) | rt.code());
+}
+
+BufferOffset Assembler::as_strex(Register rd, Register rt, Register rn,
+                                 Condition c) {
+  MOZ_ASSERT(rd != rn && rd != rt);  // True restriction on Cortex-A7 (RPi2)
+  return writeInst(0x01800f90 | (int)c | RD(rd) | RN(rn) | rt.code());
+}
+
+BufferOffset Assembler::as_strexh(Register rd, Register rt, Register rn,
+                                  Condition c) {
+  MOZ_ASSERT(rd != rn && rd != rt);  // True restriction on Cortex-A7 (RPi2)
+  return writeInst(0x01e00f90 | (int)c | RD(rd) | RN(rn) | rt.code());
+}
+
+BufferOffset Assembler::as_strexb(Register rd, Register rt, Register rn,
+                                  Condition c) {
+  MOZ_ASSERT(rd != rn && rd != rt);  // True restriction on Cortex-A7 (RPi2)
+  return writeInst(0x01c00f90 | (int)c | RD(rd) | RN(rn) | rt.code());
+}
+
+BufferOffset Assembler::as_clrex() { return writeInst(0xf57ff01f); }
+
+// Memory barrier stuff:
+
+BufferOffset Assembler::as_dmb(BarrierOption option) {
+  return writeInst(0xf57ff050U | (int)option);
+}
+BufferOffset Assembler::as_dsb(BarrierOption option) {
+  return writeInst(0xf57ff040U | (int)option);
+}
+BufferOffset Assembler::as_isb() {
+  return writeInst(0xf57ff06fU);  // option == SY
+}
+BufferOffset Assembler::as_dsb_trap() {
+  // DSB is "mcr 15, 0, r0, c7, c10, 4".
+  // See eg https://bugs.kde.org/show_bug.cgi?id=228060.
+  // ARMv7 manual, "VMSA CP15 c7 register summary".
+  // Flagged as "legacy" starting with ARMv8, may be disabled on chip, see
+  // ARMv8 manual E2.7.3 and G3.18.16.
+  return writeInst(0xee070f9a);
+}
+BufferOffset Assembler::as_dmb_trap() {
+  // DMB is "mcr 15, 0, r0, c7, c10, 5".
+  // ARMv7 manual, "VMSA CP15 c7 register summary".
+  // Flagged as "legacy" starting with ARMv8, may be disabled on chip, see
+  // ARMv8 manual E2.7.3 and G3.18.16.
+  return writeInst(0xee070fba);
+}
+BufferOffset Assembler::as_isb_trap() {
+  // ISB is "mcr 15, 0, r0, c7, c5, 4".
+  // ARMv7 manual, "VMSA CP15 c7 register summary".
+  // Flagged as "legacy" starting with ARMv8, may be disabled on chip, see
+  // ARMv8 manual E2.7.3 and G3.18.16.
+  return writeInst(0xee070f94);
+}
+
+BufferOffset Assembler::as_csdb() {
+  // NOP (see as_nop) on architectures where this instruction is not defined.
+  //
+  // https://developer.arm.com/-/media/developer/pdf/Cache_Speculation_Side-channels_22Feb18.pdf
+  // CSDB A32: 1110_0011_0010_0000_1111_0000_0001_0100
+  return writeInst(0xe320f000 | 0x14);
+}
+
+// Control flow stuff:
+
+// bx can *only* branch to a register, never to an immediate.
+BufferOffset Assembler::as_bx(Register r, Condition c) {
+  BufferOffset ret = writeInst(((int)c) | OpBx | r.code());
+  return ret;
+}
+
+void Assembler::WritePoolGuard(BufferOffset branch, Instruction* dest,
+                               BufferOffset afterPool) {
+  BOffImm off = afterPool.diffB<BOffImm>(branch);
+  if (off.isInvalid()) {
+    MOZ_CRASH("BOffImm invalid");
+  }
+  *dest = InstBImm(off, Always);
+}
+
+// Branch can branch to an immediate *or* to a register.
+// Branches to immediates are pc relative, branches to registers are absolute.
+BufferOffset Assembler::as_b(BOffImm off, Condition c, Label* documentation) {
+  return writeBranchInst(((int)c) | OpB | off.encode(),
+                         refLabel(documentation));
+}
+
+BufferOffset Assembler::as_b(Label* l, Condition c) {
+  if (l->bound()) {
+    // Note only one instruction is emitted here, the NOP is overwritten.
+    BufferOffset ret = allocBranchInst();
+    if (oom()) {
+      return BufferOffset();
+    }
+
+    BOffImm offset = BufferOffset(l).diffB<BOffImm>(ret);
+    MOZ_RELEASE_ASSERT(!offset.isInvalid(),
+                       "Buffer size limit should prevent this");
+    as_b(offset, c, ret);
+#ifdef JS_DISASM_ARM
+    spewBranch(m_buffer.getInstOrNull(ret), refLabel(l));
+#endif
+    return ret;
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  BufferOffset ret;
+  if (l->used()) {
+    int32_t old = l->offset();
+    MOZ_RELEASE_ASSERT(BOffImm::IsInRange(old),
+                       "Buffer size limit should prevent this");
+    ret = as_b(BOffImm(old), c, l);
+  } else {
+    BOffImm inv;
+    ret = as_b(inv, c, l);
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  l->use(ret.getOffset());
+  return ret;
+}
+
+BufferOffset Assembler::as_b(BOffImm off, Condition c, BufferOffset inst) {
+  // JS_DISASM_ARM NOTE: Can't disassemble here, because numerous callers use
+  // this to patchup old code.  Must disassemble in caller where it makes sense.
+  // Not many callers.
+  *editSrc(inst) = InstBImm(off, c);
+  return inst;
+}
+
+// blx can go to either an immediate or a register.
+// When blx'ing to a register, we change processor state depending on the low
+// bit of the register when blx'ing to an immediate, we *always* change
+// processor state.
+
+BufferOffset Assembler::as_blx(Register r, Condition c) {
+  return writeInst(((int)c) | OpBlx | r.code());
+}
+
+// bl can only branch to an pc-relative immediate offset
+// It cannot change the processor state.
+BufferOffset Assembler::as_bl(BOffImm off, Condition c, Label* documentation) {
+  return writeBranchInst(((int)c) | OpBl | off.encode(),
+                         refLabel(documentation));
+}
+
+BufferOffset Assembler::as_bl(Label* l, Condition c) {
+  if (l->bound()) {
+    // Note only one instruction is emitted here, the NOP is overwritten.
+    BufferOffset ret = allocBranchInst();
+    if (oom()) {
+      return BufferOffset();
+    }
+
+    BOffImm offset = BufferOffset(l).diffB<BOffImm>(ret);
+    MOZ_RELEASE_ASSERT(!offset.isInvalid(),
+                       "Buffer size limit should prevent this");
+
+    as_bl(offset, c, ret);
+#ifdef JS_DISASM_ARM
+    spewBranch(m_buffer.getInstOrNull(ret), refLabel(l));
+#endif
+    return ret;
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  BufferOffset ret;
+  // See if the list was empty.
+  if (l->used()) {
+    int32_t old = l->offset();
+    MOZ_RELEASE_ASSERT(BOffImm::IsInRange(old),
+                       "Buffer size limit should prevent this");
+    ret = as_bl(BOffImm(old), c, l);
+  } else {
+    BOffImm inv;
+    ret = as_bl(inv, c, l);
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  l->use(ret.getOffset());
+  return ret;
+}
+
+BufferOffset Assembler::as_bl(BOffImm off, Condition c, BufferOffset inst) {
+  *editSrc(inst) = InstBLImm(off, c);
+  return inst;
+}
+
+BufferOffset Assembler::as_mrs(Register r, Condition c) {
+  return writeInst(0x010f0000 | int(c) | RD(r));
+}
+
+BufferOffset Assembler::as_msr(Register r, Condition c) {
+  // Hardcode the 'mask' field to 0b11 for now. It is bits 18 and 19, which
+  // are the two high bits of the 'c' in this constant.
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return writeInst(0x012cf000 | int(c) | r.code());
+}
+
+// VFP instructions!
+enum vfp_tags { VfpTag = 0x0C000A00, VfpArith = 0x02000000 };
+
+BufferOffset Assembler::writeVFPInst(vfp_size sz, uint32_t blob) {
+  MOZ_ASSERT((sz & blob) == 0);
+  MOZ_ASSERT((VfpTag & blob) == 0);
+  return writeInst(VfpTag | std::underlying_type_t<vfp_size>(sz) | blob);
+}
+
+/* static */
+void Assembler::WriteVFPInstStatic(vfp_size sz, uint32_t blob, uint32_t* dest) {
+  MOZ_ASSERT((sz & blob) == 0);
+  MOZ_ASSERT((VfpTag & blob) == 0);
+  WriteInstStatic(VfpTag | std::underlying_type_t<vfp_size>(sz) | blob, dest);
+}
+
+// Unityped variants: all registers hold the same (ieee754 single/double)
+// notably not included are vcvt; vmov vd, #imm; vmov rt, vn.
+BufferOffset Assembler::as_vfp_float(VFPRegister vd, VFPRegister vn,
+                                     VFPRegister vm, VFPOp op, Condition c) {
+  // Make sure we believe that all of our operands are the same kind.
+  MOZ_ASSERT_IF(!vn.isMissing(), vd.equiv(vn));
+  MOZ_ASSERT_IF(!vm.isMissing(), vd.equiv(vm));
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+  return writeVFPInst(sz, VD(vd) | VN(vn) | VM(vm) | op | VfpArith | c);
+}
+
+BufferOffset Assembler::as_vadd(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvAdd, c);
+}
+
+BufferOffset Assembler::as_vdiv(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvDiv, c);
+}
+
+BufferOffset Assembler::as_vmul(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvMul, c);
+}
+
+BufferOffset Assembler::as_vnmul(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                 Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvMul, c);
+}
+
+BufferOffset Assembler::as_vnmla(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                 Condition c) {
+  MOZ_CRASH("Feature NYI");
+}
+
+BufferOffset Assembler::as_vnmls(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                 Condition c) {
+  MOZ_CRASH("Feature NYI");
+}
+
+BufferOffset Assembler::as_vneg(VFPRegister vd, VFPRegister vm, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vm, OpvNeg, c);
+}
+
+BufferOffset Assembler::as_vsqrt(VFPRegister vd, VFPRegister vm, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vm, OpvSqrt, c);
+}
+
+BufferOffset Assembler::as_vabs(VFPRegister vd, VFPRegister vm, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vm, OpvAbs, c);
+}
+
+BufferOffset Assembler::as_vsub(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvSub, c);
+}
+
+BufferOffset Assembler::as_vcmp(VFPRegister vd, VFPRegister vm, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vm, OpvCmp, c);
+}
+
+BufferOffset Assembler::as_vcmpz(VFPRegister vd, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, NoVFPRegister, OpvCmpz, c);
+}
+
+// Specifically, a move between two same sized-registers.
+BufferOffset Assembler::as_vmov(VFPRegister vd, VFPRegister vsrc, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vsrc, OpvMov, c);
+}
+
+// Transfer between Core and VFP.
+
+// Unlike the next function, moving between the core registers and vfp registers
+// can't be *that* properly typed. Namely, since I don't want to munge the type
+// VFPRegister to also include core registers. Thus, the core and vfp registers
+// are passed in based on their type, and src/dest is determined by the
+// float2core.
+
+BufferOffset Assembler::as_vxfer(Register vt1, Register vt2, VFPRegister vm,
+                                 FloatToCore_ f2c, Condition c, int idx) {
+  vfp_size sz = IsSingle;
+  if (vm.isDouble()) {
+    // Technically, this can be done with a vmov à la ARM ARM under vmov
+    // however, that requires at least an extra bit saying if the operation
+    // should be performed on the lower or upper half of the double. Moving
+    // a single to/from 2N/2N+1 isn't equivalent, since there are 32 single
+    // registers, and 32 double registers so there is no way to encode the
+    // last 16 double registers.
+    sz = IsDouble;
+    MOZ_ASSERT(idx == 0 || idx == 1);
+    // If we are transferring a single half of the double then it must be
+    // moving a VFP reg to a core reg.
+    MOZ_ASSERT_IF(vt2 == InvalidReg, f2c == FloatToCore);
+    idx = idx << 21;
+  } else {
+    MOZ_ASSERT(idx == 0);
+  }
+
+  if (vt2 == InvalidReg) {
+    return writeVFPInst(sz, WordTransfer |
+                                std::underlying_type_t<FloatToCore_>(f2c) |
+                                std::underlying_type_t<Condition>(c) | RT(vt1) |
+                                maybeRN(vt2) | VN(vm) | idx);
+  }
+
+  // We are doing a 64 bit transfer.
+  return writeVFPInst(sz, DoubleTransfer |
+                              std::underlying_type_t<FloatToCore_>(f2c) |
+                              std::underlying_type_t<Condition>(c) | RT(vt1) |
+                              maybeRN(vt2) | VM(vm) | idx);
+}
+
+enum vcvt_destFloatness { VcvtToInteger = 1 << 18, VcvtToFloat = 0 << 18 };
+enum vcvt_toZero {
+  VcvtToZero =
+      1 << 7,  // Use the default rounding mode, which rounds truncates.
+  VcvtToFPSCR = 0 << 7  // Use whatever rounding mode the fpscr specifies.
+};
+enum vcvt_Signedness {
+  VcvtToSigned = 1 << 16,
+  VcvtToUnsigned = 0 << 16,
+  VcvtFromSigned = 1 << 7,
+  VcvtFromUnsigned = 0 << 7
+};
+
+// Our encoding actually allows just the src and the dest (and their types) to
+// uniquely specify the encoding that we are going to use.
+BufferOffset Assembler::as_vcvt(VFPRegister vd, VFPRegister vm, bool useFPSCR,
+                                Condition c) {
+  // Unlike other cases, the source and dest types cannot be the same.
+  MOZ_ASSERT(!vd.equiv(vm));
+  vfp_size sz = IsDouble;
+  if (vd.isFloat() && vm.isFloat()) {
+    // Doing a float -> float conversion.
+    if (vm.isSingle()) {
+      sz = IsSingle;
+    }
+    return writeVFPInst(sz, c | 0x02B700C0 | VM(vm) | VD(vd));
+  }
+
+  // At least one of the registers should be a float.
+  vcvt_destFloatness destFloat;
+  vcvt_Signedness opSign;
+  vcvt_toZero doToZero = VcvtToFPSCR;
+  MOZ_ASSERT(vd.isFloat() || vm.isFloat());
+  if (vd.isSingle() || vm.isSingle()) {
+    sz = IsSingle;
+  }
+
+  if (vd.isFloat()) {
+    destFloat = VcvtToFloat;
+    opSign = (vm.isSInt()) ? VcvtFromSigned : VcvtFromUnsigned;
+  } else {
+    destFloat = VcvtToInteger;
+    opSign = (vd.isSInt()) ? VcvtToSigned : VcvtToUnsigned;
+    doToZero = useFPSCR ? VcvtToFPSCR : VcvtToZero;
+  }
+  return writeVFPInst(
+      sz, c | 0x02B80040 | VD(vd) | VM(vm) | destFloat | opSign | doToZero);
+}
+
+BufferOffset Assembler::as_vcvtFixed(VFPRegister vd, bool isSigned,
+                                     uint32_t fixedPoint, bool toFixed,
+                                     Condition c) {
+  MOZ_ASSERT(vd.isFloat());
+  uint32_t sx = 0x1;
+  vfp_size sf = vd.isDouble() ? IsDouble : IsSingle;
+  int32_t imm5 = fixedPoint;
+  imm5 = (sx ? 32 : 16) - imm5;
+  MOZ_ASSERT(imm5 >= 0);
+  imm5 = imm5 >> 1 | (imm5 & 1) << 5;
+  return writeVFPInst(sf, 0x02BA0040 | VD(vd) | toFixed << 18 | sx << 7 |
+                              (!isSigned) << 16 | imm5 | c);
+}
+
+// Transfer between VFP and memory.
+static uint32_t EncodeVdtr(LoadStore ls, VFPRegister vd, VFPAddr addr,
+                           Assembler::Condition c) {
+  return ls | 0x01000000 | addr.encode() | VD(vd) | c;
+}
+
+BufferOffset Assembler::as_vdtr(
+    LoadStore ls, VFPRegister vd, VFPAddr addr,
+    Condition c /* vfp doesn't have a wb option */) {
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+  return writeVFPInst(sz, EncodeVdtr(ls, vd, addr, c));
+}
+
+/* static */
+void Assembler::as_vdtr_patch(LoadStore ls, VFPRegister vd, VFPAddr addr,
+                              Condition c, uint32_t* dest) {
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+  WriteVFPInstStatic(sz, EncodeVdtr(ls, vd, addr, c), dest);
+}
+
+// VFP's ldm/stm work differently from the standard arm ones. You can only
+// transfer a range.
+
+BufferOffset Assembler::as_vdtm(LoadStore st, Register rn, VFPRegister vd,
+                                int length,
+                                /* also has update conditions */ Condition c) {
+  MOZ_ASSERT(length <= 16 && length >= 0);
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+
+  if (vd.isDouble()) {
+    length *= 2;
+  }
+
+  return writeVFPInst(sz, dtmLoadStore | RN(rn) | VD(vd) | length | dtmMode |
+                              dtmUpdate | dtmCond);
+}
+
+BufferOffset Assembler::as_vldr_unaligned(VFPRegister vd, Register rn) {
+  MOZ_ASSERT(HasNEON());
+  if (vd.isDouble()) {
+    // vld1 (multiple single elements) with align=0, size=3, numregs=1
+    return writeInst(0xF42007CF | RN(rn) | VD(vd));
+  }
+  // vld1 (single element to single lane) with index=0, size=2
+  MOZ_ASSERT(vd.isFloat());
+  MOZ_ASSERT((vd.code() & 1) == 0);
+  return writeInst(0xF4A0080F | RN(rn) | VD(vd.asDouble()));
+}
+
+BufferOffset Assembler::as_vstr_unaligned(VFPRegister vd, Register rn) {
+  MOZ_ASSERT(HasNEON());
+  if (vd.isDouble()) {
+    // vst1 (multiple single elements) with align=0, size=3, numregs=1
+    return writeInst(0xF40007CF | RN(rn) | VD(vd));
+  }
+  // vst1 (single element from one lane) with index=0, size=2
+  MOZ_ASSERT(vd.isFloat());
+  MOZ_ASSERT((vd.code() & 1) == 0);
+  return writeInst(0xF480080F | RN(rn) | VD(vd.asDouble()));
+}
+
+BufferOffset Assembler::as_vimm(VFPRegister vd, VFPImm imm, Condition c) {
+  MOZ_ASSERT(imm.isValid());
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+  return writeVFPInst(sz, c | imm.encode() | VD(vd) | 0x02B00000);
+}
+
+BufferOffset Assembler::as_vmrs(Register r, Condition c) {
+  return writeInst(c | 0x0ef10a10 | RT(r));
+}
+
+BufferOffset Assembler::as_vmsr(Register r, Condition c) {
+  return writeInst(c | 0x0ee10a10 | RT(r));
+}
+
+bool Assembler::nextLink(BufferOffset b, BufferOffset* next) {
+  Instruction branch = *editSrc(b);
+  MOZ_ASSERT(branch.is<InstBranchImm>());
+
+  BOffImm destOff;
+  branch.as<InstBranchImm>()->extractImm(&destOff);
+  if (destOff.isInvalid()) {
+    return false;
+  }
+
+  // Propagate the next link back to the caller, by constructing a new
+  // BufferOffset into the space they provided.
+  new (next) BufferOffset(destOff.decode());
+  return true;
+}
+
+void Assembler::bind(Label* label, BufferOffset boff) {
+#ifdef JS_DISASM_ARM
+  spew_.spewBind(label);
+#endif
+  if (oom()) {
+    // Ensure we always bind the label. This matches what we do on
+    // x86/x64 and silences the assert in ~Label.
+    label->bind(0);
+    return;
+  }
+
+  if (label->used()) {
+    bool more;
+    // If our caller didn't give us an explicit target to bind to then we
+    // want to bind to the location of the next instruction.
+    BufferOffset dest = boff.assigned() ? boff : nextOffset();
+    BufferOffset b(label);
+    do {
+      BufferOffset next;
+      more = nextLink(b, &next);
+      Instruction branch = *editSrc(b);
+      Condition c = branch.extractCond();
+      BOffImm offset = dest.diffB<BOffImm>(b);
+      MOZ_RELEASE_ASSERT(!offset.isInvalid(),
+                         "Buffer size limit should prevent this");
+      if (branch.is<InstBImm>()) {
+        as_b(offset, c, b);
+      } else if (branch.is<InstBLImm>()) {
+        as_bl(offset, c, b);
+      } else {
+        MOZ_CRASH("crazy fixup!");
+      }
+      b = next;
+    } while (more);
+  }
+  label->bind(nextOffset().getOffset());
+  MOZ_ASSERT(!oom());
+}
+
+void Assembler::retarget(Label* label, Label* target) {
+#ifdef JS_DISASM_ARM
+  spew_.spewRetarget(label, target);
+#endif
+  if (label->used() && !oom()) {
+    if (target->bound()) {
+      bind(label, BufferOffset(target));
+    } else if (target->used()) {
+      // The target is not bound but used. Prepend label's branch list
+      // onto target's.
+      BufferOffset labelBranchOffset(label);
+      BufferOffset next;
+
+      // Find the head of the use chain for label.
+      while (nextLink(labelBranchOffset, &next)) {
+        labelBranchOffset = next;
+      }
+
+      // Then patch the head of label's use chain to the tail of target's
+      // use chain, prepending the entire use chain of target.
+      Instruction branch = *editSrc(labelBranchOffset);
+      Condition c = branch.extractCond();
+      int32_t prev = target->offset();
+      target->use(label->offset());
+      if (branch.is<InstBImm>()) {
+        as_b(BOffImm(prev), c, labelBranchOffset);
+      } else if (branch.is<InstBLImm>()) {
+        as_bl(BOffImm(prev), c, labelBranchOffset);
+      } else {
+        MOZ_CRASH("crazy fixup!");
+      }
+    } else {
+      // The target is unbound and unused. We can just take the head of
+      // the list hanging off of label, and dump that into target.
+      target->use(label->offset());
+    }
+  }
+  label->reset();
+}
+
+static int stopBKPT = -1;
+void Assembler::as_bkpt() {
+  // This is a count of how many times a breakpoint instruction has been
+  // generated. It is embedded into the instruction for debugging
+  // purposes. Gdb will print "bkpt xxx" when you attempt to dissassemble a
+  // breakpoint with the number xxx embedded into it. If this breakpoint is
+  // being hit, then you can run (in gdb):
+  //  >b dbg_break
+  //  >b main
+  //  >commands
+  //  >set stopBKPT = xxx
+  //  >c
+  //  >end
+  // which will set a breakpoint on the function dbg_break above set a
+  // scripted breakpoint on main that will set the (otherwise unmodified)
+  // value to the number of the breakpoint, so dbg_break will actuall be
+  // called and finally, when you run the executable, execution will halt when
+  // that breakpoint is generated.
+  static int hit = 0;
+  if (stopBKPT == hit) {
+    dbg_break();
+  }
+  writeInst(0xe1200070 | (hit & 0xf) | ((hit & 0xfff0) << 4));
+  hit++;
+}
+
+BufferOffset Assembler::as_illegal_trap() {
+  // Encoding of the permanently-undefined 'udf' instruction, with the imm16
+  // set to 0.
+  return writeInst(0xe7f000f0);
+}
+
+void Assembler::flushBuffer() { m_buffer.flushPool(); }
+
+void Assembler::enterNoPool(size_t maxInst) { m_buffer.enterNoPool(maxInst); }
+
+void Assembler::leaveNoPool() { m_buffer.leaveNoPool(); }
+
+void Assembler::enterNoNops() { m_buffer.enterNoNops(); }
+
+void Assembler::leaveNoNops() { m_buffer.leaveNoNops(); }
+
+struct PoolHeader : Instruction {
+  struct Header {
+    // The size should take into account the pool header.
+    // The size is in units of Instruction (4 bytes), not byte.
+    uint32_t size : 15;
+    uint32_t isNatural : 1;
+    uint32_t ONES : 16;
+
+    Header(int size_, bool isNatural_)
+        : size(size_), isNatural(isNatural_), ONES(0xffff) {}
+
+    explicit Header(const Instruction* i) {
+      static_assert(sizeof(Header) == sizeof(uint32_t));
+      memcpy(this, i, sizeof(Header));
+      MOZ_ASSERT(ONES == 0xffff);
+    }
+
+    uint32_t raw() const {
+      static_assert(sizeof(Header) == sizeof(uint32_t));
+      uint32_t dest;
+      memcpy(&dest, this, sizeof(Header));
+      return dest;
+    }
+  };
+
+  PoolHeader(int size_, bool isNatural_)
+      : Instruction(Header(size_, isNatural_).raw(), true) {}
+
+  uint32_t size() const {
+    Header tmp(this);
+    return tmp.size;
+  }
+  uint32_t isNatural() const {
+    Header tmp(this);
+    return tmp.isNatural;
+  }
+
+  static bool IsTHIS(const Instruction& i) {
+    return (*i.raw() & 0xffff0000) == 0xffff0000;
+  }
+  static const PoolHeader* AsTHIS(const Instruction& i) {
+    if (!IsTHIS(i)) {
+      return nullptr;
+    }
+    return static_cast<const PoolHeader*>(&i);
+  }
+};
+
+void Assembler::WritePoolHeader(uint8_t* start, Pool* p, bool isNatural) {
+  static_assert(sizeof(PoolHeader) == 4,
+                "PoolHandler must have the correct size.");
+  uint8_t* pool = start + 4;
+  // Go through the usual rigmarole to get the size of the pool.
+  pool += p->getPoolSize();
+  uint32_t size = pool - start;
+  MOZ_ASSERT((size & 3) == 0);
+  size = size >> 2;
+  MOZ_ASSERT(size < (1 << 15));
+  PoolHeader header(size, isNatural);
+  *(PoolHeader*)start = header;
+}
+
+// The size of an arbitrary 32-bit call in the instruction stream. On ARM this
+// sequence is |pc = ldr pc - 4; imm32| given that we never reach the imm32.
+uint32_t Assembler::PatchWrite_NearCallSize() { return sizeof(uint32_t); }
+
+void Assembler::PatchWrite_NearCall(CodeLocationLabel start,
+                                    CodeLocationLabel toCall) {
+  Instruction* inst = (Instruction*)start.raw();
+  // Overwrite whatever instruction used to be here with a call. Since the
+  // destination is in the same function, it will be within range of the
+  // 24 << 2 byte bl instruction.
+  uint8_t* dest = toCall.raw();
+  new (inst) InstBLImm(BOffImm(dest - (uint8_t*)inst), Always);
+}
+
+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
+                                        PatchedImmPtr newValue,
+                                        PatchedImmPtr expectedValue) {
+  Instruction* ptr = reinterpret_cast<Instruction*>(label.raw());
+
+  Register dest;
+  Assembler::RelocStyle rs;
+
+  {
+    InstructionIterator iter(ptr);
+    DebugOnly<const uint32_t*> val = GetPtr32Target(iter, &dest, &rs);
+    MOZ_ASSERT(uint32_t((const uint32_t*)val) == uint32_t(expectedValue.value));
+  }
+
+  // Patch over actual instructions.
+  {
+    InstructionIterator iter(ptr);
+    MacroAssembler::ma_mov_patch(Imm32(int32_t(newValue.value)), dest, Always,
+                                 rs, iter);
+  }
+}
+
+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
+                                        ImmPtr newValue, ImmPtr expectedValue) {
+  PatchDataWithValueCheck(label, PatchedImmPtr(newValue.value),
+                          PatchedImmPtr(expectedValue.value));
+}
+
+// This just stomps over memory with 32 bits of raw data. Its purpose is to
+// overwrite the call of JITed code with 32 bits worth of an offset. This will
+// is only meant to function on code that has been invalidated, so it should be
+// totally safe. Since that instruction will never be executed again, a ICache
+// flush should not be necessary
+void Assembler::PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) {
+  // Raw is going to be the return address.
+  uint32_t* raw = (uint32_t*)label.raw();
+  // Overwrite the 4 bytes before the return address, which will end up being
+  // the call instruction.
+  *(raw - 1) = imm.value;
+}
+
+uint8_t* Assembler::NextInstruction(uint8_t* inst_, uint32_t* count) {
+  if (count != nullptr) {
+    *count += sizeof(Instruction);
+  }
+
+  InstructionIterator iter(reinterpret_cast<Instruction*>(inst_));
+  return reinterpret_cast<uint8_t*>(iter.next());
+}
+
+static bool InstIsGuard(Instruction* inst, const PoolHeader** ph) {
+  Assembler::Condition c = inst->extractCond();
+  if (c != Assembler::Always) {
+    return false;
+  }
+  if (!(inst->is<InstBXReg>() || inst->is<InstBImm>())) {
+    return false;
+  }
+  // See if the next instruction is a pool header.
+  *ph = (inst + 1)->as<const PoolHeader>();
+  return *ph != nullptr;
+}
+
+static bool InstIsGuard(BufferInstructionIterator& iter,
+                        const PoolHeader** ph) {
+  Instruction* inst = iter.cur();
+  Assembler::Condition c = inst->extractCond();
+  if (c != Assembler::Always) {
+    return false;
+  }
+  if (!(inst->is<InstBXReg>() || inst->is<InstBImm>())) {
+    return false;
+  }
+  // See if the next instruction is a pool header.
+  *ph = iter.peek()->as<const PoolHeader>();
+  return *ph != nullptr;
+}
+
+template <class T>
+static bool InstIsBNop(const T& iter) {
+  // In some special situations, it is necessary to insert a NOP into the
+  // instruction stream that nobody knows about, since nobody should know
+  // about it, make sure it gets skipped when Instruction::next() is called.
+  // this generates a very specific nop, namely a branch to the next
+  // instruction.
+  const Instruction* cur = iter.cur();
+  Assembler::Condition c = cur->extractCond();
+  if (c != Assembler::Always) {
+    return false;
+  }
+  if (!cur->is<InstBImm>()) {
+    return false;
+  }
+  InstBImm* b = cur->as<InstBImm>();
+  BOffImm offset;
+  b->extractImm(&offset);
+  return offset.decode() == 4;
+}
+
+Instruction* InstructionIterator::maybeSkipAutomaticInstructions() {
+  // If the current instruction was automatically-inserted, skip past it.
+  const PoolHeader* ph;
+
+  // Loop until an intentionally-placed instruction is found.
+  while (true) {
+    if (InstIsGuard(cur(), &ph)) {
+      // Don't skip a natural guard.
+      if (ph->isNatural()) {
+        return cur();
+      }
+      advanceRaw(1 + ph->size());
+    } else if (InstIsBNop<InstructionIterator>(*this)) {
+      advanceRaw(1);
+    } else {
+      return cur();
+    }
+  }
+}
+
+Instruction* BufferInstructionIterator::maybeSkipAutomaticInstructions() {
+  const PoolHeader* ph;
+  // If this is a guard, and the next instruction is a header, always work
+  // around the pool. If it isn't a guard, then start looking ahead.
+  if (InstIsGuard(*this, &ph)) {
+    // Don't skip a natural guard.
+    if (ph->isNatural()) {
+      return cur();
+    }
+    advance(sizeof(Instruction) * ph->size());
+    return next();
+  }
+  if (InstIsBNop<BufferInstructionIterator>(*this)) {
+    return next();
+  }
+  return cur();
+}
+
+// Cases to be handled:
+// 1) no pools or branches in sight => return this+1
+// 2) branch to next instruction => return this+2, because a nop needed to be
+//    inserted into the stream.
+// 3) this+1 is an artificial guard for a pool => return first instruction
+//    after the pool
+// 4) this+1 is a natural guard => return the branch
+// 5) this is a branch, right before a pool => return first instruction after
+//    the pool
+// in assembly form:
+// 1) add r0, r0, r0 <= this
+//    add r1, r1, r1 <= returned value
+//    add r2, r2, r2
+//
+// 2) add r0, r0, r0 <= this
+//    b foo
+//    foo:
+//    add r2, r2, r2 <= returned value
+//
+// 3) add r0, r0, r0 <= this
+//    b after_pool;
+//    .word 0xffff0002  # bit 15 being 0 indicates that the branch was not
+//                      # requested by the assembler
+//    0xdeadbeef        # the 2 indicates that there is 1 pool entry, and the
+//                      # pool header
+//    add r4, r4, r4 <= returned value
+// 4) add r0, r0, r0 <= this
+//    b after_pool  <= returned value
+//    .word 0xffff8002  # bit 15 being 1 indicates that the branch was
+//                      # requested by the assembler
+//    0xdeadbeef
+//    add r4, r4, r4
+// 5) b after_pool  <= this
+//    .word 0xffff8002  # bit 15 has no bearing on the returned value
+//    0xdeadbeef
+//    add r4, r4, r4  <= returned value
+
+Instruction* InstructionIterator::next() {
+  const PoolHeader* ph;
+
+  // If the current instruction is followed by a pool header,
+  // move past the current instruction and the pool.
+  if (InstIsGuard(cur(), &ph)) {
+    advanceRaw(1 + ph->size());
+    return maybeSkipAutomaticInstructions();
+  }
+
+  // The next instruction is then known to not be a PoolHeader.
+  advanceRaw(1);
+  return maybeSkipAutomaticInstructions();
+}
+
+void Assembler::ToggleToJmp(CodeLocationLabel inst_) {
+  uint32_t* ptr = (uint32_t*)inst_.raw();
+
+  DebugOnly<Instruction*> inst = (Instruction*)inst_.raw();
+  MOZ_ASSERT(inst->is<InstCMP>());
+
+  // Zero bits 20-27, then set 24-27 to be correct for a branch.
+  // 20-23 will be party of the B's immediate, and should be 0.
+  *ptr = (*ptr & ~(0xff << 20)) | (0xa0 << 20);
+}
+
+void Assembler::ToggleToCmp(CodeLocationLabel inst_) {
+  uint32_t* ptr = (uint32_t*)inst_.raw();
+
+  DebugOnly<Instruction*> inst = (Instruction*)inst_.raw();
+  MOZ_ASSERT(inst->is<InstBImm>());
+
+  // Ensure that this masking operation doesn't affect the offset of the
+  // branch instruction when it gets toggled back.
+  MOZ_ASSERT((*ptr & (0xf << 20)) == 0);
+
+  // Also make sure that the CMP is valid. Part of having a valid CMP is that
+  // all of the bits describing the destination in most ALU instructions are
+  // all unset (looks like it is encoding r0).
+  MOZ_ASSERT(toRD(*inst) == r0);
+
+  // Zero out bits 20-27, then set them to be correct for a compare.
+  *ptr = (*ptr & ~(0xff << 20)) | (0x35 << 20);
+}
+
+void Assembler::ToggleCall(CodeLocationLabel inst_, bool enabled) {
+  InstructionIterator iter(reinterpret_cast<Instruction*>(inst_.raw()));
+  MOZ_ASSERT(iter.cur()->is<InstMovW>() || iter.cur()->is<InstLDR>());
+
+  if (iter.cur()->is<InstMovW>()) {
+    // If it looks like the start of a movw/movt sequence, then make sure we
+    // have all of it (and advance the iterator past the full sequence).
+    iter.next();
+    MOZ_ASSERT(iter.cur()->is<InstMovT>());
+  }
+
+  iter.next();
+  MOZ_ASSERT(iter.cur()->is<InstNOP>() || iter.cur()->is<InstBLXReg>());
+
+  if (enabled == iter.cur()->is<InstBLXReg>()) {
+    // Nothing to do.
+    return;
+  }
+
+  Instruction* inst = iter.cur();
+
+  if (enabled) {
+    *inst = InstBLXReg(ScratchRegister, Always);
+  } else {
+    *inst = InstNOP();
+  }
+}
+
+size_t Assembler::ToggledCallSize(uint8_t* code) {
+  InstructionIterator iter(reinterpret_cast<Instruction*>(code));
+  MOZ_ASSERT(iter.cur()->is<InstMovW>() || iter.cur()->is<InstLDR>());
+
+  if (iter.cur()->is<InstMovW>()) {
+    // If it looks like the start of a movw/movt sequence, then make sure we
+    // have all of it (and advance the iterator past the full sequence).
+    iter.next();
+    MOZ_ASSERT(iter.cur()->is<InstMovT>());
+  }
+
+  iter.next();
+  MOZ_ASSERT(iter.cur()->is<InstNOP>() || iter.cur()->is<InstBLXReg>());
+  return uintptr_t(iter.cur()) + 4 - uintptr_t(code);
+}
+
+uint32_t Assembler::NopFill = 0;
+
+uint32_t Assembler::GetNopFill() {
+  static bool isSet = false;
+  if (!isSet) {
+    char* fillStr = getenv("ARM_ASM_NOP_FILL");
+    uint32_t fill;
+    if (fillStr && sscanf(fillStr, "%u", &fill) == 1) {
+      NopFill = fill;
+    }
+    if (NopFill > 8) {
+      MOZ_CRASH("Nop fill > 8 is not supported");
+    }
+    isSet = true;
+  }
+  return NopFill;
+}
+
+uint32_t Assembler::AsmPoolMaxOffset = 1024;
+
+uint32_t Assembler::GetPoolMaxOffset() {
+  static bool isSet = false;
+  if (!isSet) {
+    char* poolMaxOffsetStr = getenv("ASM_POOL_MAX_OFFSET");
+    uint32_t poolMaxOffset;
+    if (poolMaxOffsetStr &&
+        sscanf(poolMaxOffsetStr, "%u", &poolMaxOffset) == 1) {
+      AsmPoolMaxOffset = poolMaxOffset;
+    }
+    isSet = true;
+  }
+  return AsmPoolMaxOffset;
+}
+
+SecondScratchRegisterScope::SecondScratchRegisterScope(MacroAssembler& masm)
+    : AutoRegisterScope(masm, masm.getSecondScratchReg()) {}
+
+#ifdef JS_DISASM_ARM
+
+/* static */
+void Assembler::disassembleInstruction(const Instruction* i,
+                                       DisasmBuffer& buffer) {
+  disasm::NameConverter converter;
+  disasm::Disassembler dasm(converter);
+  uint8_t* loc = reinterpret_cast<uint8_t*>(const_cast<uint32_t*>(i->raw()));
+  dasm.InstructionDecode(buffer, loc);
+}
+
+void Assembler::initDisassembler() {
+  // The line is normally laid out like this:
+  //
+  // xxxxxxxx        ldr r, op   ; comment
+  //
+  // where xx...x is the instruction bit pattern.
+  //
+  // Labels are laid out by themselves to line up with the instructions above
+  // and below:
+  //
+  //            nnnn:
+  //
+  // Branch targets are normally on the same line as the branch instruction,
+  // but when they cannot be they will be on a line by themselves, indented
+  // significantly:
+  //
+  //                     -> label
+
+  spew_.setLabelIndent("          ");             // 10
+  spew_.setTargetIndent("                    ");  // 20
+}
+
+void Assembler::finishDisassembler() { spew_.spewOrphans(); }
+
+// Labels are named as they are encountered by adding names to a
+// table, using the Label address as the key.  This is made tricky by
+// the (memory for) Label objects being reused, but reused label
+// objects are recognizable from being marked as not used or not
+// bound.  See spew_.refLabel().
+//
+// In a number of cases there is no information about the target, and
+// we just end up printing "patchable constant load to PC".  This is
+// true especially for jumps to bailout handlers (which have no
+// names).  See allocLiteralLoadEntry() and its callers.  In some cases
+// (loop back edges) some information about the intended target may be
+// propagated from higher levels, and if so it's printed here.
+
+void Assembler::spew(Instruction* i) {
+  if (spew_.isDisabled() || !i) {
+    return;
+  }
+
+  DisasmBuffer buffer;
+  disassembleInstruction(i, buffer);
+  spew_.spew("%s", buffer.start());
+}
+
+// If a target label is known, always print that and do not attempt to
+// disassemble the branch operands, as they will often be encoding
+// metainformation (pointers for a chain of jump instructions), and
+// not actual branch targets.
+
+void Assembler::spewBranch(Instruction* i, const LabelDoc& target) {
+  if (spew_.isDisabled() || !i) {
+    return;
+  }
+
+  DisasmBuffer buffer;
+  disassembleInstruction(i, buffer);
+
+  char labelBuf[128];
+  labelBuf[0] = 0;
+
+  bool haveTarget = target.valid;
+  if (!haveTarget) {
+    SprintfLiteral(labelBuf, "  -> (link-time target)");
+  }
+
+  if (InstBranchImm::IsTHIS(*i)) {
+    InstBranchImm* bimm = InstBranchImm::AsTHIS(*i);
+    BOffImm destOff;
+    bimm->extractImm(&destOff);
+    if (destOff.isInvalid() || haveTarget) {
+      // The target information in the instruction is likely garbage, so remove
+      // it. The target label will in any case be printed if we have it.
+      //
+      // The format of the instruction disassembly is [0-9a-f]{8}\s+\S+\s+.*,
+      // where the \S+ string is the opcode.  Strip everything after the opcode,
+      // and attach the label if we have it.
+      int i;
+      for (i = 8; i < buffer.length() && buffer[i] == ' '; i++) {
+      }
+      for (; i < buffer.length() && buffer[i] != ' '; i++) {
+      }
+      buffer[i] = 0;
+      if (haveTarget) {
+        SprintfLiteral(labelBuf, "  -> %d%s", target.doc,
+                       !target.bound ? "f" : "");
+        haveTarget = false;
+      }
+    }
+  }
+  spew_.spew("%s%s", buffer.start(), labelBuf);
+
+  if (haveTarget) {
+    spew_.spewRef(target);
+  }
+}
+
+void Assembler::spewLiteralLoad(PoolHintPun& php, bool loadToPC,
+                                const Instruction* i, const LiteralDoc& doc) {
+  if (spew_.isDisabled()) {
+    return;
+  }
+
+  char litbuf[2048];
+  spew_.formatLiteral(doc, litbuf, sizeof(litbuf));
+
+  // See patchConstantPoolLoad, above.  We assemble the instruction into a
+  // buffer with a zero offset, as documentation, but the offset will be
+  // patched later.
+
+  uint32_t inst;
+  PoolHintData& data = php.phd;
+  switch (php.phd.getLoadType()) {
+    case PoolHintData::PoolDTR:
+      Assembler::as_dtr_patch(IsLoad, 32, Offset, data.getReg(),
+                              DTRAddr(pc, DtrOffImm(0)), data.getCond(), &inst);
+      break;
+    case PoolHintData::PoolBranch:
+      if (data.isValidPoolHint()) {
+        Assembler::as_dtr_patch(IsLoad, 32, Offset, pc,
+                                DTRAddr(pc, DtrOffImm(0)), data.getCond(),
+                                &inst);
+      }
+      break;
+    case PoolHintData::PoolVDTR:
+      Assembler::as_vdtr_patch(IsLoad, data.getVFPReg(),
+                               VFPAddr(pc, VFPOffImm(0)), data.getCond(),
+                               &inst);
+      break;
+
+    default:
+      MOZ_CRASH();
+  }
+
+  DisasmBuffer buffer;
+  disasm::NameConverter converter;
+  disasm::Disassembler dasm(converter);
+  dasm.InstructionDecode(buffer, reinterpret_cast<uint8_t*>(&inst));
+  spew_.spew("%s    ; .const %s", buffer.start(), litbuf);
+}
+
+#endif  // JS_DISASM_ARM
diff --git a/js/src/jit/arm/Assembler-arm.h b/js/src/jit/arm/Assembler-arm.h
new file mode 100644
index 0000000000..fdbac15a80
--- /dev/null
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -0,0 +1,2296 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_Assembler_arm_h
+#define jit_arm_Assembler_arm_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include <algorithm>
+#include <iterator>
+#include <type_traits>
+
+#include "jit/arm/Architecture-arm.h"
+#include "jit/arm/disasm/Disasm-arm.h"
+#include "jit/CompactBuffer.h"
+#include "jit/JitCode.h"
+#include "jit/shared/Assembler-shared.h"
+#include "jit/shared/Disassembler-shared.h"
+#include "jit/shared/IonAssemblerBufferWithConstantPools.h"
+#include "wasm/WasmTypeDecls.h"
+
+union PoolHintPun;
+
+namespace js {
+namespace jit {
+
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+using LabelDoc = DisassemblerSpew::LabelDoc;
+
+// NOTE: there are duplicates in this list! Sometimes we want to specifically
+// refer to the link register as a link register (bl lr is much clearer than bl
+// r14). HOWEVER, this register can easily be a gpr when it is not busy holding
+// the return address.
+static constexpr Register r0{Registers::r0};
+static constexpr Register r1{Registers::r1};
+static constexpr Register r2{Registers::r2};
+static constexpr Register r3{Registers::r3};
+static constexpr Register r4{Registers::r4};
+static constexpr Register r5{Registers::r5};
+static constexpr Register r6{Registers::r6};
+static constexpr Register r7{Registers::r7};
+static constexpr Register r8{Registers::r8};
+static constexpr Register r9{Registers::r9};
+static constexpr Register r10{Registers::r10};
+static constexpr Register r11{Registers::r11};
+static constexpr Register r12{Registers::ip};
+static constexpr Register ip{Registers::ip};
+static constexpr Register sp{Registers::sp};
+static constexpr Register r14{Registers::lr};
+static constexpr Register lr{Registers::lr};
+static constexpr Register pc{Registers::pc};
+
+static constexpr Register ScratchRegister{Registers::ip};
+
+// Helper class for ScratchRegister usage. Asserts that only one piece
+// of code thinks it has exclusive ownership of the scratch register.
+struct ScratchRegisterScope : public AutoRegisterScope {
+  explicit ScratchRegisterScope(MacroAssembler& masm)
+      : AutoRegisterScope(masm, ScratchRegister) {}
+};
+
+struct SecondScratchRegisterScope : public AutoRegisterScope {
+  explicit SecondScratchRegisterScope(MacroAssembler& masm);
+};
+
+static constexpr Register OsrFrameReg = r3;
+static constexpr Register CallTempReg0 = r5;
+static constexpr Register CallTempReg1 = r6;
+static constexpr Register CallTempReg2 = r7;
+static constexpr Register CallTempReg3 = r8;
+static constexpr Register CallTempReg4 = r0;
+static constexpr Register CallTempReg5 = r1;
+
+static constexpr Register IntArgReg0 = r0;
+static constexpr Register IntArgReg1 = r1;
+static constexpr Register IntArgReg2 = r2;
+static constexpr Register IntArgReg3 = r3;
+static constexpr Register HeapReg = r10;
+static constexpr Register CallTempNonArgRegs[] = {r5, r6, r7, r8};
+static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs);
+
+// These register assignments for the 64-bit atomic ops are frequently too
+// constraining, but we have no way of expressing looser constraints to the
+// register allocator.
+
+// CompareExchange: Any two odd/even pairs would do for `new` and `out`, and any
+// pair would do for `old`, so long as none of them overlap.
+
+static constexpr Register CmpXchgOldLo = r4;
+static constexpr Register CmpXchgOldHi = r5;
+static constexpr Register64 CmpXchgOld64 =
+    Register64(CmpXchgOldHi, CmpXchgOldLo);
+static constexpr Register CmpXchgNewLo = IntArgReg2;
+static constexpr Register CmpXchgNewHi = IntArgReg3;
+static constexpr Register64 CmpXchgNew64 =
+    Register64(CmpXchgNewHi, CmpXchgNewLo);
+static constexpr Register CmpXchgOutLo = IntArgReg0;
+static constexpr Register CmpXchgOutHi = IntArgReg1;
+static constexpr Register64 CmpXchgOut64 =
+    Register64(CmpXchgOutHi, CmpXchgOutLo);
+
+// Exchange: Any two non-equal odd/even pairs would do for `new` and `out`.
+
+static constexpr Register XchgNewLo = IntArgReg2;
+static constexpr Register XchgNewHi = IntArgReg3;
+static constexpr Register64 XchgNew64 = Register64(XchgNewHi, XchgNewLo);
+static constexpr Register XchgOutLo = IntArgReg0;
+static constexpr Register XchgOutHi = IntArgReg1;
+
+// Atomic rmw operations: Any two odd/even pairs would do for `tmp` and `out`,
+// and any pair would do for `val`, so long as none of them overlap.
+
+static constexpr Register FetchOpValLo = r4;
+static constexpr Register FetchOpValHi = r5;
+static constexpr Register64 FetchOpVal64 =
+    Register64(FetchOpValHi, FetchOpValLo);
+static constexpr Register FetchOpTmpLo = IntArgReg2;
+static constexpr Register FetchOpTmpHi = IntArgReg3;
+static constexpr Register64 FetchOpTmp64 =
+    Register64(FetchOpTmpHi, FetchOpTmpLo);
+static constexpr Register FetchOpOutLo = IntArgReg0;
+static constexpr Register FetchOpOutHi = IntArgReg1;
+static constexpr Register64 FetchOpOut64 =
+    Register64(FetchOpOutHi, FetchOpOutLo);
+
+class ABIArgGenerator {
+  unsigned intRegIndex_;
+  unsigned floatRegIndex_;
+  uint32_t stackOffset_;
+  ABIArg current_;
+
+  // ARM can either use HardFp (use float registers for float arguments), or
+  // SoftFp (use general registers for float arguments) ABI.  We keep this
+  // switch as a runtime switch because wasm always use the HardFp back-end
+  // while the calls to native functions have to use the one provided by the
+  // system.
+  bool useHardFp_;
+
+  ABIArg softNext(MIRType argType);
+  ABIArg hardNext(MIRType argType);
+
+ public:
+  ABIArgGenerator();
+
+  void setUseHardFp(bool useHardFp) {
+    MOZ_ASSERT(intRegIndex_ == 0 && floatRegIndex_ == 0);
+    useHardFp_ = useHardFp;
+  }
+  ABIArg next(MIRType argType);
+  ABIArg& current() { return current_; }
+  uint32_t stackBytesConsumedSoFar() const { return stackOffset_; }
+  void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; }
+};
+
+bool IsUnaligned(const wasm::MemoryAccessDesc& access);
+
+// These registers may be volatile or nonvolatile.
+static constexpr Register ABINonArgReg0 = r4;
+static constexpr Register ABINonArgReg1 = r5;
+static constexpr Register ABINonArgReg2 = r6;
+static constexpr Register ABINonArgReg3 = r7;
+
+// This register may be volatile or nonvolatile. Avoid d15 which is the
+// ScratchDoubleReg_.
+static constexpr FloatRegister ABINonArgDoubleReg{FloatRegisters::d8,
+                                                  VFPRegister::Double};
+
+// These registers may be volatile or nonvolatile.
+// Note: these three registers are all guaranteed to be different
+static constexpr Register ABINonArgReturnReg0 = r4;
+static constexpr Register ABINonArgReturnReg1 = r5;
+static constexpr Register ABINonVolatileReg = r6;
+
+// This register is guaranteed to be clobberable during the prologue and
+// epilogue of an ABI call which must preserve both ABI argument, return
+// and non-volatile registers.
+static constexpr Register ABINonArgReturnVolatileReg = lr;
+
+// Instance pointer argument register for WebAssembly functions. This must not
+// alias any other register used for passing function arguments or return
+// values. Preserved by WebAssembly functions.
+static constexpr Register InstanceReg = r9;
+
+// Registers used for wasm table calls. These registers must be disjoint
+// from the ABI argument registers, InstanceReg and each other.
+static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmTableCallSigReg = ABINonArgReg2;
+static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
+
+// Registers used for ref calls.
+static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmCallRefReg = ABINonArgReg3;
+
+// Register used as a scratch along the return path in the fast js -> wasm stub
+// code.  This must not overlap ReturnReg, JSReturnOperand, or InstanceReg.
+// It must be a volatile register.
+static constexpr Register WasmJitEntryReturnScratch = r5;
+
+static constexpr Register PreBarrierReg = r1;
+
+static constexpr Register InterpreterPCReg = r9;
+
+static constexpr Register InvalidReg{Registers::invalid_reg};
+static constexpr FloatRegister InvalidFloatReg;
+
+static constexpr Register JSReturnReg_Type = r3;
+static constexpr Register JSReturnReg_Data = r2;
+static constexpr Register StackPointer = sp;
+static constexpr Register FramePointer = r11;
+static constexpr Register ReturnReg = r0;
+static constexpr Register64 ReturnReg64(r1, r0);
+
+// The attribute '__value_in_regs' alters the calling convention of a function
+// so that a structure of up to four elements can be returned via the argument
+// registers rather than being written to memory.
+static constexpr Register ReturnRegVal0 = IntArgReg0;
+static constexpr Register ReturnRegVal1 = IntArgReg1;
+static constexpr Register ReturnRegVal2 = IntArgReg2;
+static constexpr Register ReturnRegVal3 = IntArgReg3;
+
+static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::d0,
+                                                   VFPRegister::Single};
+static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0,
+                                                  VFPRegister::Double};
+static constexpr FloatRegister ReturnSimd128Reg = InvalidFloatReg;
+static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s30,
+                                                     VFPRegister::Single};
+static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d15,
+                                                    VFPRegister::Double};
+static constexpr FloatRegister ScratchSimd128Reg = InvalidFloatReg;
+static constexpr FloatRegister ScratchUIntReg = {FloatRegisters::d15,
+                                                 VFPRegister::UInt};
+static constexpr FloatRegister ScratchIntReg = {FloatRegisters::d15,
+                                                VFPRegister::Int};
+
+// Do not reference ScratchFloat32Reg_ directly, use ScratchFloat32Scope
+// instead.
+struct ScratchFloat32Scope : public AutoFloatRegisterScope {
+  explicit ScratchFloat32Scope(MacroAssembler& masm)
+      : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {}
+};
+
+// Do not reference ScratchDoubleReg_ directly, use ScratchDoubleScope instead.
+struct ScratchDoubleScope : public AutoFloatRegisterScope {
+  explicit ScratchDoubleScope(MacroAssembler& masm)
+      : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {}
+};
+
+// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use
+// JSReturnOperand).
+static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpMatcherStringReg = CallTempReg1;
+static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2;
+
+// Registers used by RegExpExecTest stub (do not use ReturnReg).
+static constexpr Register RegExpExecTestRegExpReg = CallTempReg0;
+static constexpr Register RegExpExecTestStringReg = CallTempReg1;
+
+// Registers used by RegExpSearcher stub (do not use ReturnReg).
+static constexpr Register RegExpSearcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpSearcherStringReg = CallTempReg1;
+static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2;
+
+static constexpr FloatRegister d0 = {FloatRegisters::d0, VFPRegister::Double};
+static constexpr FloatRegister d1 = {FloatRegisters::d1, VFPRegister::Double};
+static constexpr FloatRegister d2 = {FloatRegisters::d2, VFPRegister::Double};
+static constexpr FloatRegister d3 = {FloatRegisters::d3, VFPRegister::Double};
+static constexpr FloatRegister d4 = {FloatRegisters::d4, VFPRegister::Double};
+static constexpr FloatRegister d5 = {FloatRegisters::d5, VFPRegister::Double};
+static constexpr FloatRegister d6 = {FloatRegisters::d6, VFPRegister::Double};
+static constexpr FloatRegister d7 = {FloatRegisters::d7, VFPRegister::Double};
+static constexpr FloatRegister d8 = {FloatRegisters::d8, VFPRegister::Double};
+static constexpr FloatRegister d9 = {FloatRegisters::d9, VFPRegister::Double};
+static constexpr FloatRegister d10 = {FloatRegisters::d10, VFPRegister::Double};
+static constexpr FloatRegister d11 = {FloatRegisters::d11, VFPRegister::Double};
+static constexpr FloatRegister d12 = {FloatRegisters::d12, VFPRegister::Double};
+static constexpr FloatRegister d13 = {FloatRegisters::d13, VFPRegister::Double};
+static constexpr FloatRegister d14 = {FloatRegisters::d14, VFPRegister::Double};
+static constexpr FloatRegister d15 = {FloatRegisters::d15, VFPRegister::Double};
+
+// For maximal awesomeness, 8 should be sufficent. ldrd/strd (dual-register
+// load/store) operate in a single cycle when the address they are dealing with
+// is 8 byte aligned. Also, the ARM abi wants the stack to be 8 byte aligned at
+// function boundaries. I'm trying to make sure this is always true.
+static constexpr uint32_t ABIStackAlignment = 8;
+static constexpr uint32_t CodeAlignment = 8;
+static constexpr uint32_t JitStackAlignment = 8;
+
+static constexpr uint32_t JitStackValueAlignment =
+    JitStackAlignment / sizeof(Value);
+static_assert(JitStackAlignment % sizeof(Value) == 0 &&
+                  JitStackValueAlignment >= 1,
+              "Stack alignment should be a non-zero multiple of sizeof(Value)");
+
+static constexpr uint32_t SimdMemoryAlignment = 8;
+
+static_assert(CodeAlignment % SimdMemoryAlignment == 0,
+              "Code alignment should be larger than any of the alignments "
+              "which are used for "
+              "the constant sections of the code buffer.  Thus it should be "
+              "larger than the "
+              "alignment for SIMD constants.");
+
+static_assert(JitStackAlignment % SimdMemoryAlignment == 0,
+              "Stack alignment should be larger than any of the alignments "
+              "which are used for "
+              "spilled values.  Thus it should be larger than the alignment "
+              "for SIMD accesses.");
+
+static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
+static const uint32_t WasmTrapInstructionLength = 4;
+
+// See comments in wasm::GenerateFunctionPrologue.  The difference between these
+// is the size of the largest callable prologue on the platform.
+static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
+
+static const Scale ScalePointer = TimesFour;
+
+class Instruction;
+class InstBranchImm;
+uint32_t RM(Register r);
+uint32_t RS(Register r);
+uint32_t RD(Register r);
+uint32_t RT(Register r);
+uint32_t RN(Register r);
+
+uint32_t maybeRD(Register r);
+uint32_t maybeRT(Register r);
+uint32_t maybeRN(Register r);
+
+Register toRN(Instruction i);
+Register toRM(Instruction i);
+Register toRD(Instruction i);
+Register toR(Instruction i);
+
+class VFPRegister;
+uint32_t VD(VFPRegister vr);
+uint32_t VN(VFPRegister vr);
+uint32_t VM(VFPRegister vr);
+
+// For being passed into the generic vfp instruction generator when there is an
+// instruction that only takes two registers.
+static constexpr VFPRegister NoVFPRegister(VFPRegister::Double, 0, false, true);
+
+struct ImmTag : public Imm32 {
+  explicit ImmTag(JSValueTag mask) : Imm32(int32_t(mask)) {}
+};
+
+struct ImmType : public ImmTag {
+  explicit ImmType(JSValueType type) : ImmTag(JSVAL_TYPE_TO_TAG(type)) {}
+};
+
+enum Index {
+  Offset = 0 << 21 | 1 << 24,
+  PreIndex = 1 << 21 | 1 << 24,
+  PostIndex = 0 << 21 | 0 << 24
+  // The docs were rather unclear on this. It sounds like
+  // 1 << 21 | 0 << 24 encodes dtrt.
+};
+
+enum IsImmOp2_ { IsImmOp2 = 1 << 25, IsNotImmOp2 = 0 << 25 };
+enum IsImmDTR_ { IsImmDTR = 0 << 25, IsNotImmDTR = 1 << 25 };
+// For the extra memory operations, ldrd, ldrsb, ldrh.
+enum IsImmEDTR_ { IsImmEDTR = 1 << 22, IsNotImmEDTR = 0 << 22 };
+
+enum ShiftType {
+  LSL = 0,   // << 5
+  LSR = 1,   // << 5
+  ASR = 2,   // << 5
+  ROR = 3,   // << 5
+  RRX = ROR  // RRX is encoded as ROR with a 0 offset.
+};
+
+// Modes for STM/LDM. Names are the suffixes applied to the instruction.
+enum DTMMode {
+  A = 0 << 24,  // empty / after
+  B = 1 << 24,  // full / before
+  D = 0 << 23,  // decrement
+  I = 1 << 23,  // increment
+  DA = D | A,
+  DB = D | B,
+  IA = I | A,
+  IB = I | B
+};
+
+enum DTMWriteBack { WriteBack = 1 << 21, NoWriteBack = 0 << 21 };
+
+// Condition code updating mode.
+enum SBit {
+  SetCC = 1 << 20,   // Set condition code.
+  LeaveCC = 0 << 20  // Leave condition code unchanged.
+};
+
+enum LoadStore { IsLoad = 1 << 20, IsStore = 0 << 20 };
+
+// You almost never want to use this directly. Instead, you wantto pass in a
+// signed constant, and let this bit be implicitly set for you. This is however,
+// necessary if we want a negative index.
+enum IsUp_ { IsUp = 1 << 23, IsDown = 0 << 23 };
+enum ALUOp {
+  OpMov = 0xd << 21,
+  OpMvn = 0xf << 21,
+  OpAnd = 0x0 << 21,
+  OpBic = 0xe << 21,
+  OpEor = 0x1 << 21,
+  OpOrr = 0xc << 21,
+  OpAdc = 0x5 << 21,
+  OpAdd = 0x4 << 21,
+  OpSbc = 0x6 << 21,
+  OpSub = 0x2 << 21,
+  OpRsb = 0x3 << 21,
+  OpRsc = 0x7 << 21,
+  OpCmn = 0xb << 21,
+  OpCmp = 0xa << 21,
+  OpTeq = 0x9 << 21,
+  OpTst = 0x8 << 21,
+  OpInvalid = -1
+};
+
+enum MULOp {
+  OpmMul = 0 << 21,
+  OpmMla = 1 << 21,
+  OpmUmaal = 2 << 21,
+  OpmMls = 3 << 21,
+  OpmUmull = 4 << 21,
+  OpmUmlal = 5 << 21,
+  OpmSmull = 6 << 21,
+  OpmSmlal = 7 << 21
+};
+enum BranchTag {
+  OpB = 0x0a000000,
+  OpBMask = 0x0f000000,
+  OpBDestMask = 0x00ffffff,
+  OpBl = 0x0b000000,
+  OpBlx = 0x012fff30,
+  OpBx = 0x012fff10
+};
+
+// Just like ALUOp, but for the vfp instruction set.
+enum VFPOp {
+  OpvMul = 0x2 << 20,
+  OpvAdd = 0x3 << 20,
+  OpvSub = 0x3 << 20 | 0x1 << 6,
+  OpvDiv = 0x8 << 20,
+  OpvMov = 0xB << 20 | 0x1 << 6,
+  OpvAbs = 0xB << 20 | 0x3 << 6,
+  OpvNeg = 0xB << 20 | 0x1 << 6 | 0x1 << 16,
+  OpvSqrt = 0xB << 20 | 0x3 << 6 | 0x1 << 16,
+  OpvCmp = 0xB << 20 | 0x1 << 6 | 0x4 << 16,
+  OpvCmpz = 0xB << 20 | 0x1 << 6 | 0x5 << 16
+};
+
+// Negate the operation, AND negate the immediate that we were passed in.
+ALUOp ALUNeg(ALUOp op, Register dest, Register scratch, Imm32* imm,
+             Register* negDest);
+bool can_dbl(ALUOp op);
+bool condsAreSafe(ALUOp op);
+
+// If there is a variant of op that has a dest (think cmp/sub) return that
+// variant of it.
+ALUOp getDestVariant(ALUOp op);
+
+static constexpr ValueOperand JSReturnOperand{JSReturnReg_Type,
+                                              JSReturnReg_Data};
+static const ValueOperand softfpReturnOperand = ValueOperand(r1, r0);
+
+// All of these classes exist solely to shuffle data into the various operands.
+// For example Operand2 can be an imm8, a register-shifted-by-a-constant or a
+// register-shifted-by-a-register. We represent this in C++ by having a base
+// class Operand2, which just stores the 32 bits of data as they will be encoded
+// in the instruction. You cannot directly create an Operand2 since it is
+// tricky, and not entirely sane to do so. Instead, you create one of its child
+// classes, e.g. Imm8. Imm8's constructor takes a single integer argument. Imm8
+// will verify that its argument can be encoded as an ARM 12 bit imm8, encode it
+// using an Imm8data, and finally call its parent's (Operand2) constructor with
+// the Imm8data. The Operand2 constructor will then call the Imm8data's encode()
+// function to extract the raw bits from it.
+//
+// In the future, we should be able to extract data from the Operand2 by asking
+// it for its component Imm8data structures. The reason this is so horribly
+// round-about is we wanted to have Imm8 and RegisterShiftedRegister inherit
+// directly from Operand2 but have all of them take up only a single word of
+// storage. We also wanted to avoid passing around raw integers at all since
+// they are error prone.
+class Op2Reg;
+class O2RegImmShift;
+class O2RegRegShift;
+
+namespace datastore {
+
+class Reg {
+  // The "second register".
+  uint32_t rm_ : 4;
+  // Do we get another register for shifting.
+  uint32_t rrs_ : 1;
+  uint32_t type_ : 2;
+  // We'd like this to be a more sensible encoding, but that would need to be
+  // a struct and that would not pack :(
+  uint32_t shiftAmount_ : 5;
+
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t pad_ : 20;
+
+ public:
+  Reg(uint32_t rm, ShiftType type, uint32_t rsr, uint32_t shiftAmount)
+      : rm_(rm), rrs_(rsr), type_(type), shiftAmount_(shiftAmount), pad_(0) {}
+  explicit Reg(const Op2Reg& op) { memcpy(this, &op, sizeof(*this)); }
+
+  uint32_t shiftAmount() const { return shiftAmount_; }
+
+  uint32_t encode() const {
+    return rm_ | (rrs_ << 4) | (type_ << 5) | (shiftAmount_ << 7);
+  }
+};
+
+// Op2 has a mode labelled "<imm8m>", which is arm's magical immediate encoding.
+// Some instructions actually get 8 bits of data, which is called Imm8Data
+// below. These should have edit distance > 1, but this is how it is for now.
+class Imm8mData {
+  uint32_t data_ : 8;
+  uint32_t rot_ : 4;
+
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t buff_ : 19;
+
+ private:
+  // Throw in an extra bit that will be 1 if we can't encode this properly.
+  // if we can encode it properly, a simple "|" will still suffice to meld it
+  // into the instruction.
+  uint32_t invalid_ : 1;
+
+ public:
+  // Default constructor makes an invalid immediate.
+  Imm8mData() : data_(0xff), rot_(0xf), buff_(0), invalid_(true) {}
+
+  Imm8mData(uint32_t data, uint32_t rot)
+      : data_(data), rot_(rot), buff_(0), invalid_(false) {
+    MOZ_ASSERT(data == data_);
+    MOZ_ASSERT(rot == rot_);
+  }
+
+  bool invalid() const { return invalid_; }
+
+  uint32_t encode() const {
+    MOZ_ASSERT(!invalid_);
+    return data_ | (rot_ << 8);
+  };
+};
+
+class Imm8Data {
+  uint32_t imm4L_ : 4;
+
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t pad_ : 4;
+
+ private:
+  uint32_t imm4H_ : 4;
+
+ public:
+  explicit Imm8Data(uint32_t imm) : imm4L_(imm & 0xf), imm4H_(imm >> 4) {
+    MOZ_ASSERT(imm <= 0xff);
+  }
+
+  uint32_t encode() const { return imm4L_ | (imm4H_ << 8); };
+};
+
+// VLDR/VSTR take an 8 bit offset, which is implicitly left shifted by 2.
+class Imm8VFPOffData {
+  uint32_t data_;
+
+ public:
+  explicit Imm8VFPOffData(uint32_t imm) : data_(imm) {
+    MOZ_ASSERT((imm & ~(0xff)) == 0);
+  }
+  uint32_t encode() const { return data_; };
+};
+
+// ARM can magically encode 256 very special immediates to be moved into a
+// register.
+struct Imm8VFPImmData {
+  // This structure's members are public and it has no constructor to
+  // initialize them, for a very special reason. Were this structure to
+  // have a constructor, the initialization for DoubleEncoder's internal
+  // table (see below) would require a rather large static constructor on
+  // some of our supported compilers. The known solution to this is to mark
+  // the constructor constexpr, but, again, some of our supported
+  // compilers don't support constexpr! So we are reduced to public
+  // members and eschewing a constructor in hopes that the initialization
+  // of DoubleEncoder's table is correct.
+  uint32_t imm4L : 4;
+  uint32_t imm4H : 4;
+  int32_t isInvalid : 24;
+
+  uint32_t encode() const {
+    // This assert is an attempting at ensuring that we don't create random
+    // instances of this structure and then asking to encode() it.
+    MOZ_ASSERT(isInvalid == 0);
+    return imm4L | (imm4H << 16);
+  };
+};
+
+class Imm12Data {
+  uint32_t data_ : 12;
+
+ public:
+  explicit Imm12Data(uint32_t imm) : data_(imm) { MOZ_ASSERT(data_ == imm); }
+
+  uint32_t encode() const { return data_; }
+};
+
+class RIS {
+  uint32_t shiftAmount_ : 5;
+
+ public:
+  explicit RIS(uint32_t imm) : shiftAmount_(imm) {
+    MOZ_ASSERT(shiftAmount_ == imm);
+  }
+
+  explicit RIS(Reg r) : shiftAmount_(r.shiftAmount()) {}
+
+  uint32_t encode() const { return shiftAmount_; }
+};
+
+class RRS {
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t mustZero_ : 1;
+
+ private:
+  // The register that holds the shift amount.
+  uint32_t rs_ : 4;
+
+ public:
+  explicit RRS(uint32_t rs) : rs_(rs) { MOZ_ASSERT(rs_ == rs); }
+
+  uint32_t encode() const { return rs_ << 1; }
+};
+
+}  // namespace datastore
+
+class MacroAssemblerARM;
+class Operand;
+
+class Operand2 {
+  friend class Operand;
+  friend class MacroAssemblerARM;
+  friend class InstALU;
+
+  uint32_t oper_ : 31;
+  uint32_t invalid_ : 1;
+
+ protected:
+  explicit Operand2(datastore::Imm8mData base)
+      : oper_(base.invalid() ? -1 : (base.encode() | uint32_t(IsImmOp2))),
+        invalid_(base.invalid()) {}
+
+  explicit Operand2(datastore::Reg base)
+      : oper_(base.encode() | uint32_t(IsNotImmOp2)), invalid_(false) {}
+
+ private:
+  explicit Operand2(uint32_t blob) : oper_(blob), invalid_(false) {}
+
+ public:
+  bool isO2Reg() const { return !(oper_ & IsImmOp2); }
+
+  Op2Reg toOp2Reg() const;
+
+  bool isImm8() const { return oper_ & IsImmOp2; }
+
+  bool invalid() const { return invalid_; }
+
+  uint32_t encode() const { return oper_; }
+};
+
+class Imm8 : public Operand2 {
+ public:
+  explicit Imm8(uint32_t imm) : Operand2(EncodeImm(imm)) {}
+
+  static datastore::Imm8mData EncodeImm(uint32_t imm) {
+    // RotateLeft below may not be called with a shift of zero.
+    if (imm <= 0xFF) {
+      return datastore::Imm8mData(imm, 0);
+    }
+
+    // An encodable integer has a maximum of 8 contiguous set bits,
+    // with an optional wrapped left rotation to even bit positions.
+    for (int rot = 1; rot < 16; rot++) {
+      uint32_t rotimm = mozilla::RotateLeft(imm, rot * 2);
+      if (rotimm <= 0xFF) {
+        return datastore::Imm8mData(rotimm, rot);
+      }
+    }
+    return datastore::Imm8mData();
+  }
+
+  // Pair template?
+  struct TwoImm8mData {
+    datastore::Imm8mData fst_, snd_;
+
+    TwoImm8mData() = default;
+
+    TwoImm8mData(datastore::Imm8mData fst, datastore::Imm8mData snd)
+        : fst_(fst), snd_(snd) {}
+
+    datastore::Imm8mData fst() const { return fst_; }
+    datastore::Imm8mData snd() const { return snd_; }
+  };
+
+  static TwoImm8mData EncodeTwoImms(uint32_t);
+};
+
+class Op2Reg : public Operand2 {
+ public:
+  explicit Op2Reg(Register rm, ShiftType type, datastore::RIS shiftImm)
+      : Operand2(datastore::Reg(rm.code(), type, 0, shiftImm.encode())) {}
+
+  explicit Op2Reg(Register rm, ShiftType type, datastore::RRS shiftReg)
+      : Operand2(datastore::Reg(rm.code(), type, 1, shiftReg.encode())) {}
+};
+
+static_assert(sizeof(Op2Reg) == sizeof(datastore::Reg),
+              "datastore::Reg(const Op2Reg&) constructor relies on Reg/Op2Reg "
+              "having same size");
+
+class O2RegImmShift : public Op2Reg {
+ public:
+  explicit O2RegImmShift(Register rn, ShiftType type, uint32_t shift)
+      : Op2Reg(rn, type, datastore::RIS(shift)) {}
+};
+
+class O2RegRegShift : public Op2Reg {
+ public:
+  explicit O2RegRegShift(Register rn, ShiftType type, Register rs)
+      : Op2Reg(rn, type, datastore::RRS(rs.code())) {}
+};
+
+O2RegImmShift O2Reg(Register r);
+O2RegImmShift lsl(Register r, int amt);
+O2RegImmShift lsr(Register r, int amt);
+O2RegImmShift asr(Register r, int amt);
+O2RegImmShift rol(Register r, int amt);
+O2RegImmShift ror(Register r, int amt);
+
+O2RegRegShift lsl(Register r, Register amt);
+O2RegRegShift lsr(Register r, Register amt);
+O2RegRegShift asr(Register r, Register amt);
+O2RegRegShift ror(Register r, Register amt);
+
+// An offset from a register to be used for ldr/str. This should include the
+// sign bit, since ARM has "signed-magnitude" offsets. That is it encodes an
+// unsigned offset, then the instruction specifies if the offset is positive or
+// negative. The +/- bit is necessary if the instruction set wants to be able to
+// have a negative register offset e.g. ldr pc, [r1,-r2];
+class DtrOff {
+  uint32_t data_;
+
+ protected:
+  explicit DtrOff(datastore::Imm12Data immdata, IsUp_ iu)
+      : data_(immdata.encode() | uint32_t(IsImmDTR) | uint32_t(iu)) {}
+
+  explicit DtrOff(datastore::Reg reg, IsUp_ iu = IsUp)
+      : data_(reg.encode() | uint32_t(IsNotImmDTR) | iu) {}
+
+ public:
+  uint32_t encode() const { return data_; }
+};
+
+class DtrOffImm : public DtrOff {
+ public:
+  explicit DtrOffImm(int32_t imm)
+      : DtrOff(datastore::Imm12Data(mozilla::Abs(imm)),
+               imm >= 0 ? IsUp : IsDown) {
+    MOZ_ASSERT(mozilla::Abs(imm) < 4096);
+  }
+};
+
+class DtrOffReg : public DtrOff {
+  // These are designed to be called by a constructor of a subclass.
+  // Constructing the necessary RIS/RRS structures is annoying.
+
+ protected:
+  explicit DtrOffReg(Register rn, ShiftType type, datastore::RIS shiftImm,
+                     IsUp_ iu = IsUp)
+      : DtrOff(datastore::Reg(rn.code(), type, 0, shiftImm.encode()), iu) {}
+
+  explicit DtrOffReg(Register rn, ShiftType type, datastore::RRS shiftReg,
+                     IsUp_ iu = IsUp)
+      : DtrOff(datastore::Reg(rn.code(), type, 1, shiftReg.encode()), iu) {}
+};
+
+class DtrRegImmShift : public DtrOffReg {
+ public:
+  explicit DtrRegImmShift(Register rn, ShiftType type, uint32_t shift,
+                          IsUp_ iu = IsUp)
+      : DtrOffReg(rn, type, datastore::RIS(shift), iu) {}
+};
+
+class DtrRegRegShift : public DtrOffReg {
+ public:
+  explicit DtrRegRegShift(Register rn, ShiftType type, Register rs,
+                          IsUp_ iu = IsUp)
+      : DtrOffReg(rn, type, datastore::RRS(rs.code()), iu) {}
+};
+
+// We will frequently want to bundle a register with its offset so that we have
+// an "operand" to a load instruction.
+class DTRAddr {
+  friend class Operand;
+
+  uint32_t data_;
+
+ public:
+  explicit DTRAddr(Register reg, DtrOff dtr)
+      : data_(dtr.encode() | (reg.code() << 16)) {}
+
+  uint32_t encode() const { return data_; }
+
+  Register getBase() const { return Register::FromCode((data_ >> 16) & 0xf); }
+};
+
+// Offsets for the extended data transfer instructions:
+// ldrsh, ldrd, ldrsb, etc.
+class EDtrOff {
+  uint32_t data_;
+
+ protected:
+  explicit EDtrOff(datastore::Imm8Data imm8, IsUp_ iu = IsUp)
+      : data_(imm8.encode() | IsImmEDTR | uint32_t(iu)) {}
+
+  explicit EDtrOff(Register rm, IsUp_ iu = IsUp)
+      : data_(rm.code() | IsNotImmEDTR | iu) {}
+
+ public:
+  uint32_t encode() const { return data_; }
+};
+
+class EDtrOffImm : public EDtrOff {
+ public:
+  explicit EDtrOffImm(int32_t imm)
+      : EDtrOff(datastore::Imm8Data(mozilla::Abs(imm)),
+                (imm >= 0) ? IsUp : IsDown) {
+    MOZ_ASSERT(mozilla::Abs(imm) < 256);
+  }
+};
+
+// This is the most-derived class, since the extended data transfer instructions
+// don't support any sort of modifying the "index" operand.
+class EDtrOffReg : public EDtrOff {
+ public:
+  explicit EDtrOffReg(Register rm) : EDtrOff(rm) {}
+};
+
+class EDtrAddr {
+  uint32_t data_;
+
+ public:
+  explicit EDtrAddr(Register r, EDtrOff off) : data_(RN(r) | off.encode()) {}
+
+  uint32_t encode() const { return data_; }
+#ifdef DEBUG
+  Register maybeOffsetRegister() const {
+    if (data_ & IsImmEDTR) {
+      return InvalidReg;
+    }
+    return Register::FromCode(data_ & 0xf);
+  }
+#endif
+};
+
+class VFPOff {
+  uint32_t data_;
+
+ protected:
+  explicit VFPOff(datastore::Imm8VFPOffData imm, IsUp_ isup)
+      : data_(imm.encode() | uint32_t(isup)) {}
+
+ public:
+  uint32_t encode() const { return data_; }
+};
+
+class VFPOffImm : public VFPOff {
+ public:
+  explicit VFPOffImm(int32_t imm)
+      : VFPOff(datastore::Imm8VFPOffData(mozilla::Abs(imm) / 4),
+               imm < 0 ? IsDown : IsUp) {
+    MOZ_ASSERT(mozilla::Abs(imm) <= 255 * 4);
+  }
+};
+
+class VFPAddr {
+  friend class Operand;
+
+  uint32_t data_;
+
+ public:
+  explicit VFPAddr(Register base, VFPOff off)
+      : data_(RN(base) | off.encode()) {}
+
+  uint32_t encode() const { return data_; }
+};
+
+class VFPImm {
+  uint32_t data_;
+
+ public:
+  explicit VFPImm(uint32_t topWordOfDouble);
+
+  static const VFPImm One;
+
+  uint32_t encode() const { return data_; }
+  bool isValid() const { return data_ != (~0U); }
+};
+
+// A BOffImm is an immediate that is used for branches. Namely, it is the offset
+// that will be encoded in the branch instruction. This is the only sane way of
+// constructing a branch.
+class BOffImm {
+  friend class InstBranchImm;
+
+  uint32_t data_;
+
+ public:
+  explicit BOffImm(int offset) : data_((offset - 8) >> 2 & 0x00ffffff) {
+    MOZ_ASSERT((offset & 0x3) == 0);
+    if (!IsInRange(offset)) {
+      MOZ_CRASH("BOffImm offset out of range");
+    }
+  }
+
+  explicit BOffImm() : data_(INVALID) {}
+
+ private:
+  explicit BOffImm(const Instruction& inst);
+
+ public:
+  static const uint32_t INVALID = 0x00800000;
+
+  uint32_t encode() const { return data_; }
+  int32_t decode() const { return ((int32_t(data_) << 8) >> 6) + 8; }
+
+  static bool IsInRange(int offset) {
+    if ((offset - 8) < -33554432) {
+      return false;
+    }
+    if ((offset - 8) > 33554428) {
+      return false;
+    }
+    return true;
+  }
+
+  bool isInvalid() const { return data_ == INVALID; }
+  Instruction* getDest(Instruction* src) const;
+};
+
+class Imm16 {
+  uint32_t lower_ : 12;
+
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t pad_ : 4;
+
+ private:
+  uint32_t upper_ : 4;
+  uint32_t invalid_ : 12;
+
+ public:
+  explicit Imm16();
+  explicit Imm16(uint32_t imm);
+  explicit Imm16(Instruction& inst);
+
+  uint32_t encode() const { return lower_ | (upper_ << 16); }
+  uint32_t decode() const { return lower_ | (upper_ << 12); }
+
+  bool isInvalid() const { return invalid_; }
+};
+
+// I would preffer that these do not exist, since there are essentially no
+// instructions that would ever take more than one of these, however, the MIR
+// wants to only have one type of arguments to functions, so bugger.
+class Operand {
+  // The encoding of registers is the same for OP2, DTR and EDTR yet the type
+  // system doesn't let us express this, so choices must be made.
+ public:
+  enum class Tag : uint8_t { OP2, MEM, FOP };
+
+ private:
+  uint32_t tag_ : 8;
+  uint32_t reg_ : 5;
+  int32_t offset_;
+
+ protected:
+  Operand(Tag tag, uint32_t regCode, int32_t offset)
+      : tag_(static_cast<uint32_t>(tag)), reg_(regCode), offset_(offset) {}
+
+ public:
+  explicit Operand(Register reg) : Operand(Tag::OP2, reg.code(), 0) {}
+
+  explicit Operand(FloatRegister freg) : Operand(Tag::FOP, freg.code(), 0) {}
+
+  explicit Operand(Register base, Imm32 off)
+      : Operand(Tag::MEM, base.code(), off.value) {}
+
+  explicit Operand(Register base, int32_t off)
+      : Operand(Tag::MEM, base.code(), off) {}
+
+  explicit Operand(const Address& addr)
+      : Operand(Tag::MEM, addr.base.code(), addr.offset) {}
+
+ public:
+  Tag tag() const { return static_cast<Tag>(tag_); }
+
+  Operand2 toOp2() const {
+    MOZ_ASSERT(tag() == Tag::OP2);
+    return O2Reg(Register::FromCode(reg_));
+  }
+
+  Register toReg() const {
+    MOZ_ASSERT(tag() == Tag::OP2);
+    return Register::FromCode(reg_);
+  }
+
+  Address toAddress() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return Address(Register::FromCode(reg_), offset_);
+  }
+  int32_t disp() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return offset_;
+  }
+
+  int32_t base() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return reg_;
+  }
+  Register baseReg() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return Register::FromCode(reg_);
+  }
+  DTRAddr toDTRAddr() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return DTRAddr(baseReg(), DtrOffImm(offset_));
+  }
+  VFPAddr toVFPAddr() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return VFPAddr(baseReg(), VFPOffImm(offset_));
+  }
+};
+
+inline Imm32 Imm64::firstHalf() const { return low(); }
+
+inline Imm32 Imm64::secondHalf() const { return hi(); }
+
+class InstructionIterator {
+ private:
+  Instruction* inst_;
+
+ public:
+  explicit InstructionIterator(Instruction* inst) : inst_(inst) {
+    maybeSkipAutomaticInstructions();
+  }
+
+  // Advances to the next intentionally-inserted instruction.
+  Instruction* next();
+
+  // Advances past any automatically-inserted instructions.
+  Instruction* maybeSkipAutomaticInstructions();
+
+  Instruction* cur() const { return inst_; }
+
+ protected:
+  // Advances past the given number of instruction-length bytes.
+  inline void advanceRaw(ptrdiff_t instructions = 1);
+};
+
+class Assembler;
+typedef js::jit::AssemblerBufferWithConstantPools<1024, 4, Instruction,
+                                                  Assembler>
+    ARMBuffer;
+
+class Assembler : public AssemblerShared {
+ public:
+  // ARM conditional constants:
+  enum ARMCondition : uint32_t {
+    EQ = 0x00000000,  // Zero
+    NE = 0x10000000,  // Non-zero
+    CS = 0x20000000,
+    CC = 0x30000000,
+    MI = 0x40000000,
+    PL = 0x50000000,
+    VS = 0x60000000,
+    VC = 0x70000000,
+    HI = 0x80000000,
+    LS = 0x90000000,
+    GE = 0xa0000000,
+    LT = 0xb0000000,
+    GT = 0xc0000000,
+    LE = 0xd0000000,
+    AL = 0xe0000000
+  };
+
+  enum Condition : uint32_t {
+    Equal = EQ,
+    NotEqual = NE,
+    Above = HI,
+    AboveOrEqual = CS,
+    Below = CC,
+    BelowOrEqual = LS,
+    GreaterThan = GT,
+    GreaterThanOrEqual = GE,
+    LessThan = LT,
+    LessThanOrEqual = LE,
+    Overflow = VS,
+    CarrySet = CS,
+    CarryClear = CC,
+    Signed = MI,
+    NotSigned = PL,
+    Zero = EQ,
+    NonZero = NE,
+    Always = AL,
+
+    VFP_NotEqualOrUnordered = NE,
+    VFP_Equal = EQ,
+    VFP_Unordered = VS,
+    VFP_NotUnordered = VC,
+    VFP_GreaterThanOrEqualOrUnordered = CS,
+    VFP_GreaterThanOrEqual = GE,
+    VFP_GreaterThanOrUnordered = HI,
+    VFP_GreaterThan = GT,
+    VFP_LessThanOrEqualOrUnordered = LE,
+    VFP_LessThanOrEqual = LS,
+    VFP_LessThanOrUnordered = LT,
+    VFP_LessThan = CC  // MI is valid too.
+  };
+
+  // Bit set when a DoubleCondition does not map to a single ARM condition.
+  // The macro assembler has to special-case these conditions, or else
+  // ConditionFromDoubleCondition will complain.
+  static const int DoubleConditionBitSpecial = 0x1;
+
+  enum DoubleCondition : uint32_t {
+    // These conditions will only evaluate to true if the comparison is
+    // ordered - i.e. neither operand is NaN.
+    DoubleOrdered = VFP_NotUnordered,
+    DoubleEqual = VFP_Equal,
+    DoubleNotEqual = VFP_NotEqualOrUnordered | DoubleConditionBitSpecial,
+    DoubleGreaterThan = VFP_GreaterThan,
+    DoubleGreaterThanOrEqual = VFP_GreaterThanOrEqual,
+    DoubleLessThan = VFP_LessThan,
+    DoubleLessThanOrEqual = VFP_LessThanOrEqual,
+    // If either operand is NaN, these conditions always evaluate to true.
+    DoubleUnordered = VFP_Unordered,
+    DoubleEqualOrUnordered = VFP_Equal | DoubleConditionBitSpecial,
+    DoubleNotEqualOrUnordered = VFP_NotEqualOrUnordered,
+    DoubleGreaterThanOrUnordered = VFP_GreaterThanOrUnordered,
+    DoubleGreaterThanOrEqualOrUnordered = VFP_GreaterThanOrEqualOrUnordered,
+    DoubleLessThanOrUnordered = VFP_LessThanOrUnordered,
+    DoubleLessThanOrEqualOrUnordered = VFP_LessThanOrEqualOrUnordered
+  };
+
+  Condition getCondition(uint32_t inst) {
+    return (Condition)(0xf0000000 & inst);
+  }
+  static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) {
+    MOZ_ASSERT(!(cond & DoubleConditionBitSpecial));
+    return static_cast<Condition>(cond);
+  }
+
+  enum BarrierOption {
+    BarrierSY = 15,  // Full system barrier
+    BarrierST = 14   // StoreStore barrier
+  };
+
+  // This should be protected, but since CodeGenerator wants to use it, it
+  // needs to go out here :(
+
+  BufferOffset nextOffset() { return m_buffer.nextOffset(); }
+
+ protected:
+  // Shim around AssemblerBufferWithConstantPools::allocEntry.
+  BufferOffset allocLiteralLoadEntry(size_t numInst, unsigned numPoolEntries,
+                                     PoolHintPun& php, uint8_t* data,
+                                     const LiteralDoc& doc = LiteralDoc(),
+                                     ARMBuffer::PoolEntry* pe = nullptr,
+                                     bool loadToPC = false);
+
+  Instruction* editSrc(BufferOffset bo) { return m_buffer.getInst(bo); }
+
+#ifdef JS_DISASM_ARM
+  typedef disasm::EmbeddedVector<char, disasm::ReasonableBufferSize>
+      DisasmBuffer;
+
+  static void disassembleInstruction(const Instruction* i,
+                                     DisasmBuffer& buffer);
+
+  void initDisassembler();
+  void finishDisassembler();
+  void spew(Instruction* i);
+  void spewBranch(Instruction* i, const LabelDoc& target);
+  void spewLiteralLoad(PoolHintPun& php, bool loadToPC, const Instruction* offs,
+                       const LiteralDoc& doc);
+#endif
+
+ public:
+  void resetCounter();
+  static uint32_t NopFill;
+  static uint32_t GetNopFill();
+  static uint32_t AsmPoolMaxOffset;
+  static uint32_t GetPoolMaxOffset();
+
+ protected:
+  // Structure for fixing up pc-relative loads/jumps when a the machine code
+  // gets moved (executable copy, gc, etc.).
+  class RelativePatch {
+    void* target_;
+    RelocationKind kind_;
+
+   public:
+    RelativePatch(void* target, RelocationKind kind)
+        : target_(target), kind_(kind) {}
+    void* target() const { return target_; }
+    RelocationKind kind() const { return kind_; }
+  };
+
+  // TODO: this should actually be a pool-like object. It is currently a big
+  // hack, and probably shouldn't exist.
+  js::Vector<RelativePatch, 8, SystemAllocPolicy> jumps_;
+
+  CompactBufferWriter jumpRelocations_;
+  CompactBufferWriter dataRelocations_;
+
+  ARMBuffer m_buffer;
+
+#ifdef JS_DISASM_ARM
+  DisassemblerSpew spew_;
+#endif
+
+ public:
+  // For the alignment fill use NOP: 0x0320f000 or (Always | InstNOP::NopInst).
+  // For the nopFill use a branch to the next instruction: 0xeaffffff.
+  Assembler()
+      : m_buffer(1, 1, 8, GetPoolMaxOffset(), 8, 0xe320f000, 0xeaffffff,
+                 GetNopFill()),
+        isFinished(false),
+        dtmActive(false),
+        dtmCond(Always) {
+#ifdef JS_DISASM_ARM
+    initDisassembler();
+#endif
+  }
+
+  ~Assembler() {
+#ifdef JS_DISASM_ARM
+    finishDisassembler();
+#endif
+  }
+
+  void setUnlimitedBuffer() { m_buffer.setUnlimited(); }
+
+  static Condition InvertCondition(Condition cond);
+  static Condition UnsignedCondition(Condition cond);
+  static Condition ConditionWithoutEqual(Condition cond);
+
+  static DoubleCondition InvertCondition(DoubleCondition cond);
+
+  void writeDataRelocation(BufferOffset offset, ImmGCPtr ptr) {
+    // Raw GC pointer relocations and Value relocations both end up in
+    // Assembler::TraceDataRelocations.
+    if (ptr.value) {
+      if (gc::IsInsideNursery(ptr.value)) {
+        embedsNurseryPointers_ = true;
+      }
+      dataRelocations_.writeUnsigned(offset.getOffset());
+    }
+  }
+
+  enum RelocBranchStyle { B_MOVWT, B_LDR_BX, B_LDR, B_MOVW_ADD };
+
+  enum RelocStyle { L_MOVWT, L_LDR };
+
+ public:
+  // Given the start of a Control Flow sequence, grab the value that is
+  // finally branched to given the start of a function that loads an address
+  // into a register get the address that ends up in the register.
+  template <class Iter>
+  static const uint32_t* GetCF32Target(Iter* iter);
+
+  static uintptr_t GetPointer(uint8_t*);
+  static const uint32_t* GetPtr32Target(InstructionIterator iter,
+                                        Register* dest = nullptr,
+                                        RelocStyle* rs = nullptr);
+
+  bool oom() const;
+
+  void setPrinter(Sprinter* sp) {
+#ifdef JS_DISASM_ARM
+    spew_.setPrinter(sp);
+#endif
+  }
+
+  Register getStackPointer() const { return StackPointer; }
+
+ private:
+  bool isFinished;
+
+ protected:
+  LabelDoc refLabel(const Label* label) {
+#ifdef JS_DISASM_ARM
+    return spew_.refLabel(label);
+#else
+    return LabelDoc();
+#endif
+  }
+
+ public:
+  void finish();
+  bool appendRawCode(const uint8_t* code, size_t numBytes);
+  bool reserve(size_t size);
+  bool swapBuffer(wasm::Bytes& bytes);
+  void copyJumpRelocationTable(uint8_t* dest);
+  void copyDataRelocationTable(uint8_t* dest);
+
+  // Size of the instruction stream, in bytes, after pools are flushed.
+  size_t size() const;
+  // Size of the jump relocation table, in bytes.
+  size_t jumpRelocationTableBytes() const;
+  size_t dataRelocationTableBytes() const;
+
+  // Size of the data table, in bytes.
+  size_t bytesNeeded() const;
+
+  // Write a single instruction into the instruction stream.  Very hot,
+  // inlined for performance
+  MOZ_ALWAYS_INLINE BufferOffset writeInst(uint32_t x) {
+    MOZ_ASSERT(hasCreator());
+    BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+    spew(m_buffer.getInstOrNull(offs));
+#endif
+    return offs;
+  }
+
+  // As above, but also mark the instruction as a branch.  Very hot, inlined
+  // for performance
+  MOZ_ALWAYS_INLINE BufferOffset
+  writeBranchInst(uint32_t x, const LabelDoc& documentation) {
+    BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+    spewBranch(m_buffer.getInstOrNull(offs), documentation);
+#endif
+    return offs;
+  }
+
+  // Write a placeholder NOP for a branch into the instruction stream
+  // (in order to adjust assembler addresses and mark it as a branch), it will
+  // be overwritten subsequently.
+  BufferOffset allocBranchInst();
+
+  // A static variant for the cases where we don't want to have an assembler
+  // object.
+  static void WriteInstStatic(uint32_t x, uint32_t* dest);
+
+ public:
+  void writeCodePointer(CodeLabel* label);
+
+  void haltingAlign(int alignment);
+  void nopAlign(int alignment);
+  BufferOffset as_nop();
+  BufferOffset as_alu(Register dest, Register src1, Operand2 op2, ALUOp op,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_mov(Register dest, Operand2 op2, SBit s = LeaveCC,
+                      Condition c = Always);
+  BufferOffset as_mvn(Register dest, Operand2 op2, SBit s = LeaveCC,
+                      Condition c = Always);
+
+  static void as_alu_patch(Register dest, Register src1, Operand2 op2, ALUOp op,
+                           SBit s, Condition c, uint32_t* pos);
+  static void as_mov_patch(Register dest, Operand2 op2, SBit s, Condition c,
+                           uint32_t* pos);
+
+  // Logical operations:
+  BufferOffset as_and(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_bic(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_eor(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_orr(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  // Reverse byte operations:
+  BufferOffset as_rev(Register dest, Register src, Condition c = Always);
+  BufferOffset as_rev16(Register dest, Register src, Condition c = Always);
+  BufferOffset as_revsh(Register dest, Register src, Condition c = Always);
+  // Mathematical operations:
+  BufferOffset as_adc(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_add(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_sbc(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_sub(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_rsb(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_rsc(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  // Test operations:
+  BufferOffset as_cmn(Register src1, Operand2 op2, Condition c = Always);
+  BufferOffset as_cmp(Register src1, Operand2 op2, Condition c = Always);
+  BufferOffset as_teq(Register src1, Operand2 op2, Condition c = Always);
+  BufferOffset as_tst(Register src1, Operand2 op2, Condition c = Always);
+
+  // Sign extension operations:
+  BufferOffset as_sxtb(Register dest, Register src, int rotate,
+                       Condition c = Always);
+  BufferOffset as_sxth(Register dest, Register src, int rotate,
+                       Condition c = Always);
+  BufferOffset as_uxtb(Register dest, Register src, int rotate,
+                       Condition c = Always);
+  BufferOffset as_uxth(Register dest, Register src, int rotate,
+                       Condition c = Always);
+
+  // Not quite ALU worthy, but useful none the less: These also have the issue
+  // of these being formatted completly differently from the standard ALU
+  // operations.
+  BufferOffset as_movw(Register dest, Imm16 imm, Condition c = Always);
+  BufferOffset as_movt(Register dest, Imm16 imm, Condition c = Always);
+
+  static void as_movw_patch(Register dest, Imm16 imm, Condition c,
+                            Instruction* pos);
+  static void as_movt_patch(Register dest, Imm16 imm, Condition c,
+                            Instruction* pos);
+
+  BufferOffset as_genmul(Register d1, Register d2, Register rm, Register rn,
+                         MULOp op, SBit s, Condition c = Always);
+  BufferOffset as_mul(Register dest, Register src1, Register src2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_mla(Register dest, Register acc, Register src1, Register src2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_umaal(Register dest1, Register dest2, Register src1,
+                        Register src2, Condition c = Always);
+  BufferOffset as_mls(Register dest, Register acc, Register src1, Register src2,
+                      Condition c = Always);
+  BufferOffset as_umull(Register dest1, Register dest2, Register src1,
+                        Register src2, SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_umlal(Register dest1, Register dest2, Register src1,
+                        Register src2, SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_smull(Register dest1, Register dest2, Register src1,
+                        Register src2, SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_smlal(Register dest1, Register dest2, Register src1,
+                        Register src2, SBit s = LeaveCC, Condition c = Always);
+
+  BufferOffset as_sdiv(Register dest, Register num, Register div,
+                       Condition c = Always);
+  BufferOffset as_udiv(Register dest, Register num, Register div,
+                       Condition c = Always);
+  BufferOffset as_clz(Register dest, Register src, Condition c = Always);
+
+  // Data transfer instructions: ldr, str, ldrb, strb.
+  // Using an int to differentiate between 8 bits and 32 bits is overkill.
+  BufferOffset as_dtr(LoadStore ls, int size, Index mode, Register rt,
+                      DTRAddr addr, Condition c = Always);
+
+  static void as_dtr_patch(LoadStore ls, int size, Index mode, Register rt,
+                           DTRAddr addr, Condition c, uint32_t* dest);
+
+  // Handles all of the other integral data transferring functions:
+  // ldrsb, ldrsh, ldrd, etc. The size is given in bits.
+  BufferOffset as_extdtr(LoadStore ls, int size, bool IsSigned, Index mode,
+                         Register rt, EDtrAddr addr, Condition c = Always);
+
+  BufferOffset as_dtm(LoadStore ls, Register rn, uint32_t mask, DTMMode mode,
+                      DTMWriteBack wb, Condition c = Always);
+
+  // Overwrite a pool entry with new data.
+  static void WritePoolEntry(Instruction* addr, Condition c, uint32_t data);
+
+  // Load a 32 bit immediate from a pool into a register.
+  BufferOffset as_Imm32Pool(Register dest, uint32_t value,
+                            Condition c = Always);
+
+  // Load a 64 bit floating point immediate from a pool into a register.
+  BufferOffset as_FImm64Pool(VFPRegister dest, double value,
+                             Condition c = Always);
+  // Load a 32 bit floating point immediate from a pool into a register.
+  BufferOffset as_FImm32Pool(VFPRegister dest, float value,
+                             Condition c = Always);
+
+  // Atomic instructions: ldrexd, ldrex, ldrexh, ldrexb, strexd, strex, strexh,
+  // strexb.
+  //
+  // The doubleword, halfword, and byte versions are available from ARMv6K
+  // forward.
+  //
+  // The word versions are available from ARMv6 forward and can be used to
+  // implement the halfword and byte versions on older systems.
+
+  // LDREXD rt, rt2, [rn].  Constraint: rt even register, rt2=rt+1.
+  BufferOffset as_ldrexd(Register rt, Register rt2, Register rn,
+                         Condition c = Always);
+
+  // LDREX rt, [rn]
+  BufferOffset as_ldrex(Register rt, Register rn, Condition c = Always);
+  BufferOffset as_ldrexh(Register rt, Register rn, Condition c = Always);
+  BufferOffset as_ldrexb(Register rt, Register rn, Condition c = Always);
+
+  // STREXD rd, rt, rt2, [rn].  Constraint: rt even register, rt2=rt+1.
+  BufferOffset as_strexd(Register rd, Register rt, Register rt2, Register rn,
+                         Condition c = Always);
+
+  // STREX rd, rt, [rn].  Constraint: rd != rn, rd != rt.
+  BufferOffset as_strex(Register rd, Register rt, Register rn,
+                        Condition c = Always);
+  BufferOffset as_strexh(Register rd, Register rt, Register rn,
+                         Condition c = Always);
+  BufferOffset as_strexb(Register rd, Register rt, Register rn,
+                         Condition c = Always);
+
+  // CLREX
+  BufferOffset as_clrex();
+
+  // Memory synchronization.
+  // These are available from ARMv7 forward.
+  BufferOffset as_dmb(BarrierOption option = BarrierSY);
+  BufferOffset as_dsb(BarrierOption option = BarrierSY);
+  BufferOffset as_isb();
+
+  // Memory synchronization for architectures before ARMv7.
+  BufferOffset as_dsb_trap();
+  BufferOffset as_dmb_trap();
+  BufferOffset as_isb_trap();
+
+  // Speculation barrier
+  BufferOffset as_csdb();
+
+  // Control flow stuff:
+
+  // bx can *only* branch to a register never to an immediate.
+  BufferOffset as_bx(Register r, Condition c = Always);
+
+  // Branch can branch to an immediate *or* to a register. Branches to
+  // immediates are pc relative, branches to registers are absolute.
+  BufferOffset as_b(BOffImm off, Condition c, Label* documentation = nullptr);
+
+  BufferOffset as_b(Label* l, Condition c = Always);
+  BufferOffset as_b(BOffImm off, Condition c, BufferOffset inst);
+
+  // blx can go to either an immediate or a register. When blx'ing to a
+  // register, we change processor mode depending on the low bit of the
+  // register when blx'ing to an immediate, we *always* change processor
+  // state.
+  BufferOffset as_blx(Label* l);
+
+  BufferOffset as_blx(Register r, Condition c = Always);
+  BufferOffset as_bl(BOffImm off, Condition c, Label* documentation = nullptr);
+  // bl can only branch+link to an immediate, never to a register it never
+  // changes processor state.
+  BufferOffset as_bl();
+  // bl #imm can have a condition code, blx #imm cannot.
+  // blx reg can be conditional.
+  BufferOffset as_bl(Label* l, Condition c);
+  BufferOffset as_bl(BOffImm off, Condition c, BufferOffset inst);
+
+  BufferOffset as_mrs(Register r, Condition c = Always);
+  BufferOffset as_msr(Register r, Condition c = Always);
+
+  // VFP instructions!
+ private:
+  enum vfp_size { IsDouble = 1 << 8, IsSingle = 0 << 8 };
+
+  BufferOffset writeVFPInst(vfp_size sz, uint32_t blob);
+
+  static void WriteVFPInstStatic(vfp_size sz, uint32_t blob, uint32_t* dest);
+
+  // Unityped variants: all registers hold the same (ieee754 single/double)
+  // notably not included are vcvt; vmov vd, #imm; vmov rt, vn.
+  BufferOffset as_vfp_float(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                            VFPOp op, Condition c = Always);
+
+ public:
+  BufferOffset as_vadd(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                       Condition c = Always);
+  BufferOffset as_vdiv(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                       Condition c = Always);
+  BufferOffset as_vmul(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                       Condition c = Always);
+  BufferOffset as_vnmul(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                        Condition c = Always);
+  BufferOffset as_vnmla(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                        Condition c = Always);
+  BufferOffset as_vnmls(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                        Condition c = Always);
+  BufferOffset as_vneg(VFPRegister vd, VFPRegister vm, Condition c = Always);
+  BufferOffset as_vsqrt(VFPRegister vd, VFPRegister vm, Condition c = Always);
+  BufferOffset as_vabs(VFPRegister vd, VFPRegister vm, Condition c = Always);
+  BufferOffset as_vsub(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                       Condition c = Always);
+  BufferOffset as_vcmp(VFPRegister vd, VFPRegister vm, Condition c = Always);
+  BufferOffset as_vcmpz(VFPRegister vd, Condition c = Always);
+
+  // Specifically, a move between two same sized-registers.
+  BufferOffset as_vmov(VFPRegister vd, VFPRegister vsrc, Condition c = Always);
+
+  // Transfer between Core and VFP.
+  enum FloatToCore_ { FloatToCore = 1 << 20, CoreToFloat = 0 << 20 };
+
+ private:
+  enum VFPXferSize { WordTransfer = 0x02000010, DoubleTransfer = 0x00400010 };
+
+ public:
+  // Unlike the next function, moving between the core registers and vfp
+  // registers can't be *that* properly typed. Namely, since I don't want to
+  // munge the type VFPRegister to also include core registers. Thus, the core
+  // and vfp registers are passed in based on their type, and src/dest is
+  // determined by the float2core.
+
+  BufferOffset as_vxfer(Register vt1, Register vt2, VFPRegister vm,
+                        FloatToCore_ f2c, Condition c = Always, int idx = 0);
+
+  // Our encoding actually allows just the src and the dest (and their types)
+  // to uniquely specify the encoding that we are going to use.
+  BufferOffset as_vcvt(VFPRegister vd, VFPRegister vm, bool useFPSCR = false,
+                       Condition c = Always);
+
+  // Hard coded to a 32 bit fixed width result for now.
+  BufferOffset as_vcvtFixed(VFPRegister vd, bool isSigned, uint32_t fixedPoint,
+                            bool toFixed, Condition c = Always);
+
+  // Transfer between VFP and memory.
+  BufferOffset as_vdtr(LoadStore ls, VFPRegister vd, VFPAddr addr,
+                       Condition c = Always /* vfp doesn't have a wb option*/);
+
+  static void as_vdtr_patch(LoadStore ls, VFPRegister vd, VFPAddr addr,
+                            Condition c /* vfp doesn't have a wb option */,
+                            uint32_t* dest);
+
+  // VFP's ldm/stm work differently from the standard arm ones. You can only
+  // transfer a range.
+
+  BufferOffset as_vdtm(LoadStore st, Register rn, VFPRegister vd, int length,
+                       /* also has update conditions */ Condition c = Always);
+
+  // vldr/vstr variants that handle unaligned accesses.  These encode as NEON
+  // single-element instructions and can only be used if NEON is available.
+  // Here, vd must be tagged as a float or double register.
+  BufferOffset as_vldr_unaligned(VFPRegister vd, Register rn);
+  BufferOffset as_vstr_unaligned(VFPRegister vd, Register rn);
+
+  BufferOffset as_vimm(VFPRegister vd, VFPImm imm, Condition c = Always);
+
+  BufferOffset as_vmrs(Register r, Condition c = Always);
+  BufferOffset as_vmsr(Register r, Condition c = Always);
+
+  // Label operations.
+  bool nextLink(BufferOffset b, BufferOffset* next);
+  void bind(Label* label, BufferOffset boff = BufferOffset());
+  uint32_t currentOffset() { return nextOffset().getOffset(); }
+  void retarget(Label* label, Label* target);
+  // I'm going to pretend this doesn't exist for now.
+  void retarget(Label* label, void* target, RelocationKind reloc);
+
+  static void Bind(uint8_t* rawCode, const CodeLabel& label);
+
+  void as_bkpt();
+  BufferOffset as_illegal_trap();
+
+ public:
+  static void TraceJumpRelocations(JSTracer* trc, JitCode* code,
+                                   CompactBufferReader& reader);
+  static void TraceDataRelocations(JSTracer* trc, JitCode* code,
+                                   CompactBufferReader& reader);
+
+  void assertNoGCThings() const {
+#ifdef DEBUG
+    MOZ_ASSERT(dataRelocations_.length() == 0);
+    for (auto& j : jumps_) {
+      MOZ_ASSERT(j.kind() == RelocationKind::HARDCODED);
+    }
+#endif
+  }
+
+  static bool SupportsFloatingPoint() { return HasVFP(); }
+  static bool SupportsUnalignedAccesses() { return HasARMv7(); }
+  // Note, returning false here is technically wrong, but one has to go via the
+  // as_vldr_unaligned and as_vstr_unaligned instructions to get proper behavior
+  // and those are NEON-specific and have to be asked for specifically.
+  static bool SupportsFastUnalignedFPAccesses() { return false; }
+
+  static bool HasRoundInstruction(RoundingMode mode) { return false; }
+
+ protected:
+  void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind) {
+    enoughMemory_ &= jumps_.append(RelativePatch(target.value, kind));
+    if (kind == RelocationKind::JITCODE) {
+      jumpRelocations_.writeUnsigned(src.getOffset());
+    }
+  }
+
+ public:
+  // The buffer is about to be linked, make sure any constant pools or excess
+  // bookkeeping has been flushed to the instruction stream.
+  void flush() {
+    MOZ_ASSERT(!isFinished);
+    m_buffer.flushPool();
+    return;
+  }
+
+  void comment(const char* msg) {
+#ifdef JS_DISASM_ARM
+    spew_.spew("; %s", msg);
+#endif
+  }
+
+  // Copy the assembly code to the given buffer, and perform any pending
+  // relocations relying on the target address.
+  void executableCopy(uint8_t* buffer);
+
+  // Actual assembly emitting functions.
+
+  // Since I can't think of a reasonable default for the mode, I'm going to
+  // leave it as a required argument.
+  void startDataTransferM(LoadStore ls, Register rm, DTMMode mode,
+                          DTMWriteBack update = NoWriteBack,
+                          Condition c = Always) {
+    MOZ_ASSERT(!dtmActive);
+    dtmUpdate = update;
+    dtmBase = rm;
+    dtmLoadStore = ls;
+    dtmLastReg = -1;
+    dtmRegBitField = 0;
+    dtmActive = 1;
+    dtmCond = c;
+    dtmMode = mode;
+  }
+
+  void transferReg(Register rn) {
+    MOZ_ASSERT(dtmActive);
+    MOZ_ASSERT(rn.code() > dtmLastReg);
+    dtmRegBitField |= 1 << rn.code();
+    if (dtmLoadStore == IsLoad && rn.code() == 13 && dtmBase.code() == 13) {
+      MOZ_CRASH("ARM Spec says this is invalid");
+    }
+  }
+  void finishDataTransfer() {
+    dtmActive = false;
+    as_dtm(dtmLoadStore, dtmBase, dtmRegBitField, dtmMode, dtmUpdate, dtmCond);
+  }
+
+  void startFloatTransferM(LoadStore ls, Register rm, DTMMode mode,
+                           DTMWriteBack update = NoWriteBack,
+                           Condition c = Always) {
+    MOZ_ASSERT(!dtmActive);
+    dtmActive = true;
+    dtmUpdate = update;
+    dtmLoadStore = ls;
+    dtmBase = rm;
+    dtmCond = c;
+    dtmLastReg = -1;
+    dtmMode = mode;
+    dtmDelta = 0;
+  }
+  void transferFloatReg(VFPRegister rn) {
+    if (dtmLastReg == -1) {
+      vdtmFirstReg = rn.code();
+    } else {
+      if (dtmDelta == 0) {
+        dtmDelta = rn.code() - dtmLastReg;
+        MOZ_ASSERT(dtmDelta == 1 || dtmDelta == -1);
+      }
+      MOZ_ASSERT(dtmLastReg >= 0);
+      MOZ_ASSERT(rn.code() == unsigned(dtmLastReg) + dtmDelta);
+    }
+
+    dtmLastReg = rn.code();
+  }
+  void finishFloatTransfer() {
+    MOZ_ASSERT(dtmActive);
+    dtmActive = false;
+    MOZ_ASSERT(dtmLastReg != -1);
+    dtmDelta = dtmDelta ? dtmDelta : 1;
+    // The operand for the vstr/vldr instruction is the lowest register in the
+    // range.
+    int low = std::min(dtmLastReg, vdtmFirstReg);
+    int high = std::max(dtmLastReg, vdtmFirstReg);
+    // Fencepost problem.
+    int len = high - low + 1;
+    // vdtm can only transfer 16 registers at once.  If we need to transfer
+    // more, then either hoops are necessary, or we need to be updating the
+    // register.
+    MOZ_ASSERT_IF(len > 16, dtmUpdate == WriteBack);
+
+    int adjustLow = dtmLoadStore == IsStore ? 0 : 1;
+    int adjustHigh = dtmLoadStore == IsStore ? -1 : 0;
+    while (len > 0) {
+      // Limit the instruction to 16 registers.
+      int curLen = std::min(len, 16);
+      // If it is a store, we want to start at the high end and move down
+      // (e.g. vpush d16-d31; vpush d0-d15).
+      int curStart = (dtmLoadStore == IsStore) ? high - curLen + 1 : low;
+      as_vdtm(dtmLoadStore, dtmBase,
+              VFPRegister(FloatRegister::FromCode(curStart)), curLen, dtmCond);
+      // Update the bounds.
+      low += adjustLow * curLen;
+      high += adjustHigh * curLen;
+      // Update the length parameter.
+      len -= curLen;
+    }
+  }
+
+ private:
+  int dtmRegBitField;
+  int vdtmFirstReg;
+  int dtmLastReg;
+  int dtmDelta;
+  Register dtmBase;
+  DTMWriteBack dtmUpdate;
+  DTMMode dtmMode;
+  LoadStore dtmLoadStore;
+  bool dtmActive;
+  Condition dtmCond;
+
+ public:
+  enum {
+    PadForAlign8 = (int)0x00,
+    PadForAlign16 = (int)0x0000,
+    PadForAlign32 = (int)0xe12fff7f  // 'bkpt 0xffff'
+  };
+
+  // API for speaking with the IonAssemblerBufferWithConstantPools generate an
+  // initial placeholder instruction that we want to later fix up.
+  static void InsertIndexIntoTag(uint8_t* load, uint32_t index);
+
+  // Take the stub value that was written in before, and write in an actual
+  // load using the index we'd computed previously as well as the address of
+  // the pool start.
+  static void PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr);
+
+  // We're not tracking short-range branches for ARM for now.
+  static void PatchShortRangeBranchToVeneer(ARMBuffer*, unsigned rangeIdx,
+                                            BufferOffset deadline,
+                                            BufferOffset veneer) {
+    MOZ_CRASH();
+  }
+  // END API
+
+  // Move our entire pool into the instruction stream. This is to force an
+  // opportunistic dump of the pool, prefferably when it is more convenient to
+  // do a dump.
+  void flushBuffer();
+  void enterNoPool(size_t maxInst);
+  void leaveNoPool();
+  void enterNoNops();
+  void leaveNoNops();
+
+  static void WritePoolHeader(uint8_t* start, Pool* p, bool isNatural);
+  static void WritePoolGuard(BufferOffset branch, Instruction* inst,
+                             BufferOffset dest);
+
+  static uint32_t PatchWrite_NearCallSize();
+  static uint32_t NopSize() { return 4; }
+  static void PatchWrite_NearCall(CodeLocationLabel start,
+                                  CodeLocationLabel toCall);
+  static void PatchDataWithValueCheck(CodeLocationLabel label,
+                                      PatchedImmPtr newValue,
+                                      PatchedImmPtr expectedValue);
+  static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue,
+                                      ImmPtr expectedValue);
+  static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm);
+
+  static uint32_t AlignDoubleArg(uint32_t offset) { return (offset + 1) & ~1; }
+  static uint8_t* NextInstruction(uint8_t* instruction,
+                                  uint32_t* count = nullptr);
+
+  // Toggle a jmp or cmp emitted by toggledJump().
+  static void ToggleToJmp(CodeLocationLabel inst_);
+  static void ToggleToCmp(CodeLocationLabel inst_);
+
+  static size_t ToggledCallSize(uint8_t* code);
+  static void ToggleCall(CodeLocationLabel inst_, bool enabled);
+
+  void processCodeLabels(uint8_t* rawCode);
+
+  void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
+                                   const Disassembler::HeapAccess& heapAccess) {
+    // Implement this if we implement a disassembler.
+  }
+};  // Assembler
+
+// An Instruction is a structure for both encoding and decoding any and all ARM
+// instructions. Many classes have not been implemented thus far.
+class Instruction {
+  uint32_t data;
+
+ protected:
+  // This is not for defaulting to always, this is for instructions that
+  // cannot be made conditional, and have the usually invalid 4b1111 cond
+  // field.
+  explicit Instruction(uint32_t data_, bool fake = false)
+      : data(data_ | 0xf0000000) {
+    MOZ_ASSERT(fake || ((data_ & 0xf0000000) == 0));
+  }
+  // Standard constructor.
+  Instruction(uint32_t data_, Assembler::Condition c)
+      : data(data_ | (uint32_t)c) {
+    MOZ_ASSERT((data_ & 0xf0000000) == 0);
+  }
+  // You should never create an instruction directly. You should create a more
+  // specific instruction which will eventually call one of these constructors
+  // for you.
+ public:
+  uint32_t encode() const { return data; }
+  // Check if this instruction is really a particular case.
+  template <class C>
+  bool is() const {
+    return C::IsTHIS(*this);
+  }
+
+  // Safely get a more specific variant of this pointer.
+  template <class C>
+  C* as() const {
+    return C::AsTHIS(*this);
+  }
+
+  const Instruction& operator=(Instruction src) {
+    data = src.data;
+    return *this;
+  }
+  // Since almost all instructions have condition codes, the condition code
+  // extractor resides in the base class.
+  Assembler::Condition extractCond() const {
+    MOZ_ASSERT(data >> 28 != 0xf,
+               "The instruction does not have condition code");
+    return (Assembler::Condition)(data & 0xf0000000);
+  }
+
+  // Sometimes, an api wants a uint32_t (or a pointer to it) rather than an
+  // instruction. raw() just coerces this into a pointer to a uint32_t.
+  const uint32_t* raw() const { return &data; }
+  uint32_t size() const { return 4; }
+};  // Instruction
+
+// Make sure that it is the right size.
+static_assert(sizeof(Instruction) == 4);
+
+inline void InstructionIterator::advanceRaw(ptrdiff_t instructions) {
+  inst_ = inst_ + instructions;
+}
+
+// Data Transfer Instructions.
+class InstDTR : public Instruction {
+ public:
+  enum IsByte_ { IsByte = 0x00400000, IsWord = 0x00000000 };
+  static const int IsDTR = 0x04000000;
+  static const int IsDTRMask = 0x0c000000;
+
+  // TODO: Replace the initialization with something that is safer.
+  InstDTR(LoadStore ls, IsByte_ ib, Index mode, Register rt, DTRAddr addr,
+          Assembler::Condition c)
+      : Instruction(std::underlying_type_t<LoadStore>(ls) |
+                        std::underlying_type_t<IsByte_>(ib) |
+                        std::underlying_type_t<Index>(mode) | RT(rt) |
+                        addr.encode() | IsDTR,
+                    c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstDTR* AsTHIS(const Instruction& i);
+};
+static_assert(sizeof(InstDTR) == sizeof(Instruction));
+
+class InstLDR : public InstDTR {
+ public:
+  InstLDR(Index mode, Register rt, DTRAddr addr, Assembler::Condition c)
+      : InstDTR(IsLoad, IsWord, mode, rt, addr, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstLDR* AsTHIS(const Instruction& i);
+
+  int32_t signedOffset() const {
+    int32_t offset = encode() & 0xfff;
+    if (IsUp_(encode() & IsUp) != IsUp) {
+      return -offset;
+    }
+    return offset;
+  }
+  uint32_t* dest() const {
+    int32_t offset = signedOffset();
+    // When patching the load in PatchConstantPoolLoad, we ensure that the
+    // offset is a multiple of 4, offset by 8 bytes from the actual
+    // location.  Indeed, when the base register is PC, ARM's 3 stages
+    // pipeline design makes it that PC is off by 8 bytes (= 2 *
+    // sizeof(uint32*)) when we actually executed it.
+    MOZ_ASSERT(offset % 4 == 0);
+    offset >>= 2;
+    return (uint32_t*)raw() + offset + 2;
+  }
+};
+static_assert(sizeof(InstDTR) == sizeof(InstLDR));
+
+class InstNOP : public Instruction {
+ public:
+  static const uint32_t NopInst = 0x0320f000;
+
+  InstNOP() : Instruction(NopInst, Assembler::Always) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstNOP* AsTHIS(Instruction& i);
+};
+
+// Branching to a register, or calling a register
+class InstBranchReg : public Instruction {
+ protected:
+  // Don't use BranchTag yourself, use a derived instruction.
+  enum BranchTag { IsBX = 0x012fff10, IsBLX = 0x012fff30 };
+
+  static const uint32_t IsBRegMask = 0x0ffffff0;
+
+  InstBranchReg(BranchTag tag, Register rm, Assembler::Condition c)
+      : Instruction(tag | rm.code(), c) {}
+
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstBranchReg* AsTHIS(const Instruction& i);
+
+  // Get the register that is being branched to
+  void extractDest(Register* dest);
+  // Make sure we are branching to a pre-known register
+  bool checkDest(Register dest);
+};
+static_assert(sizeof(InstBranchReg) == sizeof(Instruction));
+
+// Branching to an immediate offset, or calling an immediate offset
+class InstBranchImm : public Instruction {
+ protected:
+  enum BranchTag { IsB = 0x0a000000, IsBL = 0x0b000000 };
+
+  static const uint32_t IsBImmMask = 0x0f000000;
+
+  InstBranchImm(BranchTag tag, BOffImm off, Assembler::Condition c)
+      : Instruction(tag | off.encode(), c) {}
+
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstBranchImm* AsTHIS(const Instruction& i);
+
+  void extractImm(BOffImm* dest);
+};
+static_assert(sizeof(InstBranchImm) == sizeof(Instruction));
+
+// Very specific branching instructions.
+class InstBXReg : public InstBranchReg {
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstBXReg* AsTHIS(const Instruction& i);
+};
+
+class InstBLXReg : public InstBranchReg {
+ public:
+  InstBLXReg(Register reg, Assembler::Condition c)
+      : InstBranchReg(IsBLX, reg, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstBLXReg* AsTHIS(const Instruction& i);
+};
+
+class InstBImm : public InstBranchImm {
+ public:
+  InstBImm(BOffImm off, Assembler::Condition c) : InstBranchImm(IsB, off, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstBImm* AsTHIS(const Instruction& i);
+};
+
+class InstBLImm : public InstBranchImm {
+ public:
+  InstBLImm(BOffImm off, Assembler::Condition c)
+      : InstBranchImm(IsBL, off, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstBLImm* AsTHIS(const Instruction& i);
+};
+
+// Both movw and movt. The layout of both the immediate and the destination
+// register is the same so the code is being shared.
+class InstMovWT : public Instruction {
+ protected:
+  enum WT { IsW = 0x03000000, IsT = 0x03400000 };
+  static const uint32_t IsWTMask = 0x0ff00000;
+
+  InstMovWT(Register rd, Imm16 imm, WT wt, Assembler::Condition c)
+      : Instruction(RD(rd) | imm.encode() | wt, c) {}
+
+ public:
+  void extractImm(Imm16* dest);
+  void extractDest(Register* dest);
+  bool checkImm(Imm16 dest);
+  bool checkDest(Register dest);
+
+  static bool IsTHIS(Instruction& i);
+  static InstMovWT* AsTHIS(Instruction& i);
+};
+static_assert(sizeof(InstMovWT) == sizeof(Instruction));
+
+class InstMovW : public InstMovWT {
+ public:
+  InstMovW(Register rd, Imm16 imm, Assembler::Condition c)
+      : InstMovWT(rd, imm, IsW, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstMovW* AsTHIS(const Instruction& i);
+};
+
+class InstMovT : public InstMovWT {
+ public:
+  InstMovT(Register rd, Imm16 imm, Assembler::Condition c)
+      : InstMovWT(rd, imm, IsT, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstMovT* AsTHIS(const Instruction& i);
+};
+
+class InstALU : public Instruction {
+  static const int32_t ALUMask = 0xc << 24;
+
+ public:
+  InstALU(Register rd, Register rn, Operand2 op2, ALUOp op, SBit s,
+          Assembler::Condition c)
+      : Instruction(maybeRD(rd) | maybeRN(rn) | op2.encode() | op | s, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstALU* AsTHIS(const Instruction& i);
+
+  void extractOp(ALUOp* ret);
+  bool checkOp(ALUOp op);
+  void extractDest(Register* ret);
+  bool checkDest(Register rd);
+  void extractOp1(Register* ret);
+  bool checkOp1(Register rn);
+  Operand2 extractOp2();
+};
+
+class InstCMP : public InstALU {
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstCMP* AsTHIS(const Instruction& i);
+};
+
+class InstMOV : public InstALU {
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstMOV* AsTHIS(const Instruction& i);
+};
+
+// Compile-time iterator over instructions, with a safe interface that
+// references not-necessarily-linear Instructions by linear BufferOffset.
+class BufferInstructionIterator
+    : public ARMBuffer::AssemblerBufferInstIterator {
+ public:
+  BufferInstructionIterator(BufferOffset bo, ARMBuffer* buffer)
+      : ARMBuffer::AssemblerBufferInstIterator(bo, buffer) {}
+
+  // Advances the buffer to the next intentionally-inserted instruction.
+  Instruction* next() {
+    advance(cur()->size());
+    maybeSkipAutomaticInstructions();
+    return cur();
+  }
+
+  // Advances the BufferOffset past any automatically-inserted instructions.
+  Instruction* maybeSkipAutomaticInstructions();
+};
+
+static const uint32_t NumIntArgRegs = 4;
+
+// There are 16 *float* registers available for arguments
+// If doubles are used, only half the number of registers are available.
+static const uint32_t NumFloatArgRegs = 16;
+
+static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+                                Register* out) {
+  if (usedIntArgs >= NumIntArgRegs) {
+    return false;
+  }
+
+  *out = Register::FromCode(usedIntArgs);
+  return true;
+}
+
+// Get a register in which we plan to put a quantity that will be used as an
+// integer argument. This differs from GetIntArgReg in that if we have no more
+// actual argument registers to use we will fall back on using whatever
+// CallTempReg* don't overlap the argument registers, and only fail once those
+// run out too.
+static inline bool GetTempRegForIntArg(uint32_t usedIntArgs,
+                                       uint32_t usedFloatArgs, Register* out) {
+  if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) {
+    return true;
+  }
+
+  // Unfortunately, we have to assume things about the point at which
+  // GetIntArgReg returns false, because we need to know how many registers it
+  // can allocate.
+  usedIntArgs -= NumIntArgRegs;
+  if (usedIntArgs >= NumCallTempNonArgRegs) {
+    return false;
+  }
+
+  *out = CallTempNonArgRegs[usedIntArgs];
+  return true;
+}
+
+#if defined(JS_CODEGEN_ARM_HARDFP) || defined(JS_SIMULATOR_ARM)
+
+static inline bool GetFloat32ArgReg(uint32_t usedIntArgs,
+                                    uint32_t usedFloatArgs,
+                                    FloatRegister* out) {
+  MOZ_ASSERT(UseHardFpABI());
+  if (usedFloatArgs >= NumFloatArgRegs) {
+    return false;
+  }
+  *out = VFPRegister(usedFloatArgs, VFPRegister::Single);
+  return true;
+}
+static inline bool GetDoubleArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+                                   FloatRegister* out) {
+  MOZ_ASSERT(UseHardFpABI());
+  MOZ_ASSERT((usedFloatArgs % 2) == 0);
+  if (usedFloatArgs >= NumFloatArgRegs) {
+    return false;
+  }
+  *out = VFPRegister(usedFloatArgs >> 1, VFPRegister::Double);
+  return true;
+}
+
+#endif
+
+class DoubleEncoder {
+  struct DoubleEntry {
+    uint32_t dblTop;
+    datastore::Imm8VFPImmData data;
+  };
+
+  static const DoubleEntry table[256];
+
+ public:
+  bool lookup(uint32_t top, datastore::Imm8VFPImmData* ret) const {
+    for (int i = 0; i < 256; i++) {
+      if (table[i].dblTop == top) {
+        *ret = table[i].data;
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+// Forbids nop filling for testing purposes. Not nestable.
+class AutoForbidNops {
+ protected:
+  Assembler* masm_;
+
+ public:
+  explicit AutoForbidNops(Assembler* masm) : masm_(masm) {
+    masm_->enterNoNops();
+  }
+  ~AutoForbidNops() { masm_->leaveNoNops(); }
+};
+
+class AutoForbidPoolsAndNops : public AutoForbidNops {
+ public:
+  // The maxInst argument is the maximum number of word sized instructions
+  // that will be allocated within this context. It is used to determine if
+  // the pool needs to be dumped before entering this content. The debug code
+  // checks that no more than maxInst instructions are actually allocated.
+  //
+  // Allocation of pool entries is not supported within this content so the
+  // code can not use large integers or float constants etc.
+  AutoForbidPoolsAndNops(Assembler* masm, size_t maxInst)
+      : AutoForbidNops(masm) {
+    masm_->enterNoPool(maxInst);
+  }
+
+  ~AutoForbidPoolsAndNops() { masm_->leaveNoPool(); }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_Assembler_arm_h */
diff --git a/js/src/jit/arm/CodeGenerator-arm.cpp b/js/src/jit/arm/CodeGenerator-arm.cpp
new file mode 100644
index 0000000000..1526be81c9
--- /dev/null
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -0,0 +1,3154 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/CodeGenerator-arm.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include <iterator>
+
+#include "jsnum.h"
+
+#include "jit/CodeGenerator.h"
+#include "jit/InlineScriptTree.h"
+#include "jit/JitRuntime.h"
+#include "jit/MIR.h"
+#include "jit/MIRGraph.h"
+#include "js/Conversions.h"
+#include "js/ScalarType.h"  // js::Scalar::Type
+#include "vm/JSContext.h"
+#include "vm/Realm.h"
+#include "vm/Shape.h"
+
+#include "jit/MacroAssembler-inl.h"
+#include "jit/shared/CodeGenerator-shared-inl.h"
+#include "vm/JSScript-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using JS::GenericNaN;
+using JS::ToInt32;
+using mozilla::DebugOnly;
+using mozilla::FloorLog2;
+using mozilla::NegativeInfinity;
+
+// shared
+CodeGeneratorARM::CodeGeneratorARM(MIRGenerator* gen, LIRGraph* graph,
+                                   MacroAssembler* masm)
+    : CodeGeneratorShared(gen, graph, masm) {}
+
+Register64 CodeGeneratorARM::ToOperandOrRegister64(
+    const LInt64Allocation input) {
+  return ToRegister64(input);
+}
+
+void CodeGeneratorARM::emitBranch(Assembler::Condition cond,
+                                  MBasicBlock* mirTrue, MBasicBlock* mirFalse) {
+  if (isNextBlock(mirFalse->lir())) {
+    jumpToBlock(mirTrue, cond);
+  } else {
+    jumpToBlock(mirFalse, Assembler::InvertCondition(cond));
+    jumpToBlock(mirTrue);
+  }
+}
+
+void OutOfLineBailout::accept(CodeGeneratorARM* codegen) {
+  codegen->visitOutOfLineBailout(this);
+}
+
+void CodeGenerator::visitTestIAndBranch(LTestIAndBranch* test) {
+  const LAllocation* opd = test->getOperand(0);
+  MBasicBlock* ifTrue = test->ifTrue();
+  MBasicBlock* ifFalse = test->ifFalse();
+
+  // Test the operand
+  masm.as_cmp(ToRegister(opd), Imm8(0));
+
+  if (isNextBlock(ifFalse->lir())) {
+    jumpToBlock(ifTrue, Assembler::NonZero);
+  } else if (isNextBlock(ifTrue->lir())) {
+    jumpToBlock(ifFalse, Assembler::Zero);
+  } else {
+    jumpToBlock(ifFalse, Assembler::Zero);
+    jumpToBlock(ifTrue);
+  }
+}
+
+void CodeGenerator::visitCompare(LCompare* comp) {
+  Assembler::Condition cond =
+      JSOpToCondition(comp->mir()->compareType(), comp->jsop());
+  const LAllocation* left = comp->getOperand(0);
+  const LAllocation* right = comp->getOperand(1);
+  const LDefinition* def = comp->getDef(0);
+
+  ScratchRegisterScope scratch(masm);
+
+  if (right->isConstant()) {
+    masm.ma_cmp(ToRegister(left), Imm32(ToInt32(right)), scratch);
+  } else if (right->isRegister()) {
+    masm.ma_cmp(ToRegister(left), ToRegister(right));
+  } else {
+    SecondScratchRegisterScope scratch2(masm);
+    masm.ma_cmp(ToRegister(left), Operand(ToAddress(right)), scratch, scratch2);
+  }
+  masm.ma_mov(Imm32(0), ToRegister(def));
+  masm.ma_mov(Imm32(1), ToRegister(def), cond);
+}
+
+void CodeGenerator::visitCompareAndBranch(LCompareAndBranch* comp) {
+  Assembler::Condition cond =
+      JSOpToCondition(comp->cmpMir()->compareType(), comp->jsop());
+  const LAllocation* left = comp->left();
+  const LAllocation* right = comp->right();
+
+  ScratchRegisterScope scratch(masm);
+
+  if (right->isConstant()) {
+    masm.ma_cmp(ToRegister(left), Imm32(ToInt32(right)), scratch);
+  } else if (right->isRegister()) {
+    masm.ma_cmp(ToRegister(left), ToRegister(right));
+  } else {
+    SecondScratchRegisterScope scratch2(masm);
+    masm.ma_cmp(ToRegister(left), Operand(ToAddress(right)), scratch, scratch2);
+  }
+  emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+bool CodeGeneratorARM::generateOutOfLineCode() {
+  if (!CodeGeneratorShared::generateOutOfLineCode()) {
+    return false;
+  }
+
+  if (deoptLabel_.used()) {
+    // All non-table-based bailouts will go here.
+    masm.bind(&deoptLabel_);
+
+    // Push the frame size, so the handler can recover the IonScript.
+    masm.push(Imm32(frameSize()));
+
+    TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler();
+    masm.jump(handler);
+  }
+
+  return !masm.oom();
+}
+
+void CodeGeneratorARM::bailoutIf(Assembler::Condition condition,
+                                 LSnapshot* snapshot) {
+  encode(snapshot);
+
+  InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+  OutOfLineBailout* ool =
+      new (alloc()) OutOfLineBailout(snapshot, masm.framePushed());
+
+  // All bailout code is associated with the bytecodeSite of the block we are
+  // bailing out from.
+  addOutOfLineCode(ool,
+                   new (alloc()) BytecodeSite(tree, tree->script()->code()));
+
+  masm.ma_b(ool->entry(), condition);
+}
+
+void CodeGeneratorARM::bailoutFrom(Label* label, LSnapshot* snapshot) {
+  MOZ_ASSERT_IF(!masm.oom(), label->used());
+  MOZ_ASSERT_IF(!masm.oom(), !label->bound());
+
+  encode(snapshot);
+
+  InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+  OutOfLineBailout* ool =
+      new (alloc()) OutOfLineBailout(snapshot, masm.framePushed());
+
+  // All bailout code is associated with the bytecodeSite of the block we are
+  // bailing out from.
+  addOutOfLineCode(ool,
+                   new (alloc()) BytecodeSite(tree, tree->script()->code()));
+
+  masm.retarget(label, ool->entry());
+}
+
+void CodeGeneratorARM::bailout(LSnapshot* snapshot) {
+  Label label;
+  masm.ma_b(&label);
+  bailoutFrom(&label, snapshot);
+}
+
+void CodeGeneratorARM::visitOutOfLineBailout(OutOfLineBailout* ool) {
+  masm.push(Imm32(ool->snapshot()->snapshotOffset()));
+  masm.ma_b(&deoptLabel_);
+}
+
+void CodeGenerator::visitMinMaxD(LMinMaxD* ins) {
+  FloatRegister first = ToFloatRegister(ins->first());
+  FloatRegister second = ToFloatRegister(ins->second());
+
+  MOZ_ASSERT(first == ToFloatRegister(ins->output()));
+
+  if (ins->mir()->isMax()) {
+    masm.maxDouble(second, first, true);
+  } else {
+    masm.minDouble(second, first, true);
+  }
+}
+
+void CodeGenerator::visitMinMaxF(LMinMaxF* ins) {
+  FloatRegister first = ToFloatRegister(ins->first());
+  FloatRegister second = ToFloatRegister(ins->second());
+
+  MOZ_ASSERT(first == ToFloatRegister(ins->output()));
+
+  if (ins->mir()->isMax()) {
+    masm.maxFloat32(second, first, true);
+  } else {
+    masm.minFloat32(second, first, true);
+  }
+}
+
+void CodeGenerator::visitAddI(LAddI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+
+  ScratchRegisterScope scratch(masm);
+
+  if (rhs->isConstant()) {
+    masm.ma_add(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), scratch,
+                SetCC);
+  } else if (rhs->isRegister()) {
+    masm.ma_add(ToRegister(lhs), ToRegister(rhs), ToRegister(dest), SetCC);
+  } else {
+    masm.ma_add(ToRegister(lhs), Operand(ToAddress(rhs)), ToRegister(dest),
+                SetCC);
+  }
+
+  if (ins->snapshot()) {
+    bailoutIf(Assembler::Overflow, ins->snapshot());
+  }
+}
+
+void CodeGenerator::visitAddI64(LAddI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LAddI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LAddI64::Rhs);
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  if (IsConstant(rhs)) {
+    masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+    return;
+  }
+
+  masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+}
+
+void CodeGenerator::visitSubI(LSubI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+
+  ScratchRegisterScope scratch(masm);
+
+  if (rhs->isConstant()) {
+    masm.ma_sub(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), scratch,
+                SetCC);
+  } else if (rhs->isRegister()) {
+    masm.ma_sub(ToRegister(lhs), ToRegister(rhs), ToRegister(dest), SetCC);
+  } else {
+    masm.ma_sub(ToRegister(lhs), Operand(ToAddress(rhs)), ToRegister(dest),
+                SetCC);
+  }
+
+  if (ins->snapshot()) {
+    bailoutIf(Assembler::Overflow, ins->snapshot());
+  }
+}
+
+void CodeGenerator::visitSubI64(LSubI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs);
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  if (IsConstant(rhs)) {
+    masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+    return;
+  }
+
+  masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+}
+
+void CodeGenerator::visitMulI(LMulI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+  MMul* mul = ins->mir();
+  MOZ_ASSERT_IF(mul->mode() == MMul::Integer,
+                !mul->canBeNegativeZero() && !mul->canOverflow());
+
+  if (rhs->isConstant()) {
+    // Bailout when this condition is met.
+    Assembler::Condition c = Assembler::Overflow;
+    // Bailout on -0.0
+    int32_t constant = ToInt32(rhs);
+    if (mul->canBeNegativeZero() && constant <= 0) {
+      Assembler::Condition bailoutCond =
+          (constant == 0) ? Assembler::LessThan : Assembler::Equal;
+      masm.as_cmp(ToRegister(lhs), Imm8(0));
+      bailoutIf(bailoutCond, ins->snapshot());
+    }
+    // TODO: move these to ma_mul.
+    switch (constant) {
+      case -1:
+        masm.as_rsb(ToRegister(dest), ToRegister(lhs), Imm8(0), SetCC);
+        break;
+      case 0:
+        masm.ma_mov(Imm32(0), ToRegister(dest));
+        return;  // Escape overflow check;
+      case 1:
+        // Nop
+        masm.ma_mov(ToRegister(lhs), ToRegister(dest));
+        return;  // Escape overflow check;
+      case 2:
+        masm.ma_add(ToRegister(lhs), ToRegister(lhs), ToRegister(dest), SetCC);
+        // Overflow is handled later.
+        break;
+      default: {
+        bool handled = false;
+        if (constant > 0) {
+          // Try shift and add sequences for a positive constant.
+          if (!mul->canOverflow()) {
+            // If it cannot overflow, we can do lots of optimizations.
+            Register src = ToRegister(lhs);
+            uint32_t shift = FloorLog2(constant);
+            uint32_t rest = constant - (1 << shift);
+            // See if the constant has one bit set, meaning it can be
+            // encoded as a bitshift.
+            if ((1 << shift) == constant) {
+              masm.ma_lsl(Imm32(shift), src, ToRegister(dest));
+              handled = true;
+            } else {
+              // If the constant cannot be encoded as (1 << C1), see
+              // if it can be encoded as (1 << C1) | (1 << C2), which
+              // can be computed using an add and a shift.
+              uint32_t shift_rest = FloorLog2(rest);
+              if ((1u << shift_rest) == rest) {
+                masm.as_add(ToRegister(dest), src,
+                            lsl(src, shift - shift_rest));
+                if (shift_rest != 0) {
+                  masm.ma_lsl(Imm32(shift_rest), ToRegister(dest),
+                              ToRegister(dest));
+                }
+                handled = true;
+              }
+            }
+          } else if (ToRegister(lhs) != ToRegister(dest)) {
+            // To stay on the safe side, only optimize things that are a
+            // power of 2.
+
+            uint32_t shift = FloorLog2(constant);
+            if ((1 << shift) == constant) {
+              // dest = lhs * pow(2,shift)
+              masm.ma_lsl(Imm32(shift), ToRegister(lhs), ToRegister(dest));
+              // At runtime, check (lhs == dest >> shift), if this
+              // does not hold, some bits were lost due to overflow,
+              // and the computation should be resumed as a double.
+              masm.as_cmp(ToRegister(lhs), asr(ToRegister(dest), shift));
+              c = Assembler::NotEqual;
+              handled = true;
+            }
+          }
+        }
+
+        if (!handled) {
+          ScratchRegisterScope scratch(masm);
+          if (mul->canOverflow()) {
+            c = masm.ma_check_mul(ToRegister(lhs), Imm32(ToInt32(rhs)),
+                                  ToRegister(dest), scratch, c);
+          } else {
+            masm.ma_mul(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest),
+                        scratch);
+          }
+        }
+      }
+    }
+    // Bailout on overflow.
+    if (mul->canOverflow()) {
+      bailoutIf(c, ins->snapshot());
+    }
+  } else {
+    Assembler::Condition c = Assembler::Overflow;
+
+    if (mul->canOverflow()) {
+      ScratchRegisterScope scratch(masm);
+      c = masm.ma_check_mul(ToRegister(lhs), ToRegister(rhs), ToRegister(dest),
+                            scratch, c);
+    } else {
+      masm.ma_mul(ToRegister(lhs), ToRegister(rhs), ToRegister(dest));
+    }
+
+    // Bailout on overflow.
+    if (mul->canOverflow()) {
+      bailoutIf(c, ins->snapshot());
+    }
+
+    if (mul->canBeNegativeZero()) {
+      Label done;
+      masm.as_cmp(ToRegister(dest), Imm8(0));
+      masm.ma_b(&done, Assembler::NotEqual);
+
+      // Result is -0 if lhs or rhs is negative.
+      masm.ma_cmn(ToRegister(lhs), ToRegister(rhs));
+      bailoutIf(Assembler::Signed, ins->snapshot());
+
+      masm.bind(&done);
+    }
+  }
+}
+
+void CodeGenerator::visitMulI64(LMulI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
+
+  MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir));
+
+  if (IsConstant(rhs)) {
+    int64_t constant = ToInt64(rhs);
+    switch (constant) {
+      case -1:
+        masm.neg64(ToRegister64(lhs));
+        return;
+      case 0:
+        masm.xor64(ToRegister64(lhs), ToRegister64(lhs));
+        return;
+      case 1:
+        // nop
+        return;
+      case 2:
+        masm.add64(ToRegister64(lhs), ToRegister64(lhs));
+        return;
+      default:
+        if (constant > 0) {
+          // Use shift if constant is power of 2.
+          int32_t shift = mozilla::FloorLog2(constant);
+          if (int64_t(1) << shift == constant) {
+            masm.lshift64(Imm32(shift), ToRegister64(lhs));
+            return;
+          }
+        }
+        Register temp = ToTempRegisterOrInvalid(lir->temp());
+        masm.mul64(Imm64(constant), ToRegister64(lhs), temp);
+    }
+  } else {
+    Register temp = ToTempRegisterOrInvalid(lir->temp());
+    masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
+  }
+}
+
+void CodeGeneratorARM::divICommon(MDiv* mir, Register lhs, Register rhs,
+                                  Register output, LSnapshot* snapshot,
+                                  Label& done) {
+  ScratchRegisterScope scratch(masm);
+
+  if (mir->canBeNegativeOverflow()) {
+    // Handle INT32_MIN / -1;
+    // The integer division will give INT32_MIN, but we want -(double)INT32_MIN.
+
+    // Sets EQ if lhs == INT32_MIN.
+    masm.ma_cmp(lhs, Imm32(INT32_MIN), scratch);
+    // If EQ (LHS == INT32_MIN), sets EQ if rhs == -1.
+    masm.ma_cmp(rhs, Imm32(-1), scratch, Assembler::Equal);
+    if (mir->canTruncateOverflow()) {
+      if (mir->trapOnError()) {
+        Label ok;
+        masm.ma_b(&ok, Assembler::NotEqual);
+        masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset());
+        masm.bind(&ok);
+      } else {
+        // (-INT32_MIN)|0 = INT32_MIN
+        Label skip;
+        masm.ma_b(&skip, Assembler::NotEqual);
+        masm.ma_mov(Imm32(INT32_MIN), output);
+        masm.ma_b(&done);
+        masm.bind(&skip);
+      }
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, snapshot);
+    }
+  }
+
+  // Handle divide by zero.
+  if (mir->canBeDivideByZero()) {
+    masm.as_cmp(rhs, Imm8(0));
+    if (mir->canTruncateInfinities()) {
+      if (mir->trapOnError()) {
+        Label nonZero;
+        masm.ma_b(&nonZero, Assembler::NotEqual);
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+        masm.bind(&nonZero);
+      } else {
+        // Infinity|0 == 0
+        Label skip;
+        masm.ma_b(&skip, Assembler::NotEqual);
+        masm.ma_mov(Imm32(0), output);
+        masm.ma_b(&done);
+        masm.bind(&skip);
+      }
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, snapshot);
+    }
+  }
+
+  // Handle negative 0.
+  if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
+    Label nonzero;
+    masm.as_cmp(lhs, Imm8(0));
+    masm.ma_b(&nonzero, Assembler::NotEqual);
+    masm.as_cmp(rhs, Imm8(0));
+    MOZ_ASSERT(mir->fallible());
+    bailoutIf(Assembler::LessThan, snapshot);
+    masm.bind(&nonzero);
+  }
+}
+
+void CodeGenerator::visitDivI(LDivI* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register temp = ToRegister(ins->getTemp(0));
+  Register output = ToRegister(ins->output());
+  MDiv* mir = ins->mir();
+
+  Label done;
+  divICommon(mir, lhs, rhs, output, ins->snapshot(), done);
+
+  if (mir->canTruncateRemainder()) {
+    masm.ma_sdiv(lhs, rhs, output);
+  } else {
+    {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_sdiv(lhs, rhs, temp);
+      masm.ma_mul(temp, rhs, scratch);
+      masm.ma_cmp(lhs, scratch);
+    }
+    bailoutIf(Assembler::NotEqual, ins->snapshot());
+    masm.ma_mov(temp, output);
+  }
+
+  masm.bind(&done);
+}
+
+extern "C" {
+extern MOZ_EXPORT int64_t __aeabi_idivmod(int, int);
+extern MOZ_EXPORT int64_t __aeabi_uidivmod(int, int);
+}
+
+void CodeGenerator::visitSoftDivI(LSoftDivI* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+  MDiv* mir = ins->mir();
+
+  Label done;
+  divICommon(mir, lhs, rhs, output, ins->snapshot(), done);
+
+  if (gen->compilingWasm()) {
+    masm.Push(InstanceReg);
+    int32_t framePushedAfterInstance = masm.framePushed();
+    masm.setupWasmABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+    masm.callWithABI(mir->bytecodeOffset(),
+                     wasm::SymbolicAddress::aeabi_idivmod,
+                     mozilla::Some(instanceOffset));
+    masm.Pop(InstanceReg);
+  } else {
+    using Fn = int64_t (*)(int, int);
+    masm.setupAlignedABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    masm.callWithABI<Fn, __aeabi_idivmod>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+  }
+
+  // idivmod returns the quotient in r0, and the remainder in r1.
+  if (!mir->canTruncateRemainder()) {
+    MOZ_ASSERT(mir->fallible());
+    masm.as_cmp(r1, Imm8(0));
+    bailoutIf(Assembler::NonZero, ins->snapshot());
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
+  MDiv* mir = ins->mir();
+  Register lhs = ToRegister(ins->numerator());
+  Register output = ToRegister(ins->output());
+  int32_t shift = ins->shift();
+
+  if (shift == 0) {
+    masm.ma_mov(lhs, output);
+    return;
+  }
+
+  if (!mir->isTruncated()) {
+    // If the remainder is != 0, bailout since this must be a double.
+    {
+      // The bailout code also needs the scratch register.
+      // Here it is only used as a dummy target to set CC flags.
+      ScratchRegisterScope scratch(masm);
+      masm.as_mov(scratch, lsl(lhs, 32 - shift), SetCC);
+    }
+    bailoutIf(Assembler::NonZero, ins->snapshot());
+  }
+
+  if (!mir->canBeNegativeDividend()) {
+    // Numerator is unsigned, so needs no adjusting. Do the shift.
+    masm.as_mov(output, asr(lhs, shift));
+    return;
+  }
+
+  // Adjust the value so that shifting produces a correctly rounded result
+  // when the numerator is negative. See 10-1 "Signed Division by a Known
+  // Power of 2" in Henry S. Warren, Jr.'s Hacker's Delight.
+  ScratchRegisterScope scratch(masm);
+
+  if (shift > 1) {
+    masm.as_mov(scratch, asr(lhs, 31));
+    masm.as_add(scratch, lhs, lsr(scratch, 32 - shift));
+  } else {
+    masm.as_add(scratch, lhs, lsr(lhs, 32 - shift));
+  }
+
+  // Do the shift.
+  masm.as_mov(output, asr(scratch, shift));
+}
+
+void CodeGeneratorARM::modICommon(MMod* mir, Register lhs, Register rhs,
+                                  Register output, LSnapshot* snapshot,
+                                  Label& done) {
+  // X % 0 is bad because it will give garbage (or abort), when it should give
+  // NaN.
+
+  if (mir->canBeDivideByZero()) {
+    masm.as_cmp(rhs, Imm8(0));
+    if (mir->isTruncated()) {
+      Label nonZero;
+      masm.ma_b(&nonZero, Assembler::NotEqual);
+      if (mir->trapOnError()) {
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+      } else {
+        // NaN|0 == 0
+        masm.ma_mov(Imm32(0), output);
+        masm.ma_b(&done);
+      }
+      masm.bind(&nonZero);
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, snapshot);
+    }
+  }
+}
+
+void CodeGenerator::visitModI(LModI* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+  MMod* mir = ins->mir();
+
+  // Contrary to other architectures (notably x86) INT_MIN % -1 doesn't need to
+  // be handled separately. |ma_smod| computes the remainder using the |SDIV|
+  // and the |MLS| instructions. On overflow, |SDIV| truncates the result to
+  // 32-bit and returns INT_MIN, see ARM Architecture Reference Manual, SDIV
+  // instruction.
+  //
+  //   mls(INT_MIN, sdiv(INT_MIN, -1), -1)
+  // = INT_MIN - (sdiv(INT_MIN, -1) * -1)
+  // = INT_MIN - (INT_MIN * -1)
+  // = INT_MIN - INT_MIN
+  // = 0
+  //
+  // And a zero remainder with a negative dividend is already handled below.
+
+  Label done;
+  modICommon(mir, lhs, rhs, output, ins->snapshot(), done);
+
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_smod(lhs, rhs, output, scratch);
+  }
+
+  // If X%Y == 0 and X < 0, then we *actually* wanted to return -0.0.
+  if (mir->canBeNegativeDividend()) {
+    if (mir->isTruncated()) {
+      // -0.0|0 == 0
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      // See if X < 0
+      masm.as_cmp(output, Imm8(0));
+      masm.ma_b(&done, Assembler::NotEqual);
+      masm.as_cmp(lhs, Imm8(0));
+      bailoutIf(Assembler::Signed, ins->snapshot());
+    }
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitSoftModI(LSoftModI* ins) {
+  // Extract the registers from this instruction.
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+  Register callTemp = ToRegister(ins->callTemp());
+  MMod* mir = ins->mir();
+  Label done;
+
+  // Save the lhs in case we end up with a 0 that should be a -0.0 because lhs <
+  // 0.
+  MOZ_ASSERT(callTemp != lhs);
+  MOZ_ASSERT(callTemp != rhs);
+  masm.ma_mov(lhs, callTemp);
+
+  // Prevent INT_MIN % -1.
+  //
+  // |aeabi_idivmod| is allowed to return any arbitrary value when called with
+  // |(INT_MIN, -1)|, see "Run-time ABI for the ARM architecture manual". Most
+  // implementations perform a non-trapping signed integer division and
+  // return the expected result, i.e. INT_MIN. But since we can't rely on this
+  // behavior, handle this case separately here.
+  if (mir->canBeNegativeDividend()) {
+    {
+      ScratchRegisterScope scratch(masm);
+      // Sets EQ if lhs == INT_MIN
+      masm.ma_cmp(lhs, Imm32(INT_MIN), scratch);
+      // If EQ (LHS == INT_MIN), sets EQ if rhs == -1
+      masm.ma_cmp(rhs, Imm32(-1), scratch, Assembler::Equal);
+    }
+    if (mir->isTruncated()) {
+      // (INT_MIN % -1)|0 == 0
+      Label skip;
+      masm.ma_b(&skip, Assembler::NotEqual);
+      masm.ma_mov(Imm32(0), output);
+      masm.ma_b(&done);
+      masm.bind(&skip);
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, ins->snapshot());
+    }
+  }
+
+  modICommon(mir, lhs, rhs, output, ins->snapshot(), done);
+
+  if (gen->compilingWasm()) {
+    masm.Push(InstanceReg);
+    int32_t framePushedAfterInstance = masm.framePushed();
+    masm.setupWasmABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+    masm.callWithABI(mir->bytecodeOffset(),
+                     wasm::SymbolicAddress::aeabi_idivmod,
+                     mozilla::Some(instanceOffset));
+    masm.Pop(InstanceReg);
+  } else {
+    using Fn = int64_t (*)(int, int);
+    masm.setupAlignedABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    masm.callWithABI<Fn, __aeabi_idivmod>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+  }
+
+  MOZ_ASSERT(r1 != output);
+  masm.move32(r1, output);
+
+  // If X%Y == 0 and X < 0, then we *actually* wanted to return -0.0
+  if (mir->canBeNegativeDividend()) {
+    if (mir->isTruncated()) {
+      // -0.0|0 == 0
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      // See if X < 0
+      masm.as_cmp(output, Imm8(0));
+      masm.ma_b(&done, Assembler::NotEqual);
+      masm.as_cmp(callTemp, Imm8(0));
+      bailoutIf(Assembler::Signed, ins->snapshot());
+    }
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) {
+  Register in = ToRegister(ins->getOperand(0));
+  Register out = ToRegister(ins->getDef(0));
+  MMod* mir = ins->mir();
+  Label fin;
+  // bug 739870, jbramley has a different sequence that may help with speed
+  // here.
+
+  masm.ma_mov(in, out, SetCC);
+  masm.ma_b(&fin, Assembler::Zero);
+  masm.as_rsb(out, out, Imm8(0), LeaveCC, Assembler::Signed);
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_and(Imm32((1 << ins->shift()) - 1), out, scratch);
+  }
+  masm.as_rsb(out, out, Imm8(0), SetCC, Assembler::Signed);
+  if (mir->canBeNegativeDividend()) {
+    if (!mir->isTruncated()) {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Zero, ins->snapshot());
+    } else {
+      // -0|0 == 0
+    }
+  }
+  masm.bind(&fin);
+}
+
+void CodeGenerator::visitModMaskI(LModMaskI* ins) {
+  Register src = ToRegister(ins->getOperand(0));
+  Register dest = ToRegister(ins->getDef(0));
+  Register tmp1 = ToRegister(ins->getTemp(0));
+  Register tmp2 = ToRegister(ins->getTemp(1));
+  MMod* mir = ins->mir();
+
+  ScratchRegisterScope scratch(masm);
+  SecondScratchRegisterScope scratch2(masm);
+
+  masm.ma_mod_mask(src, dest, tmp1, tmp2, scratch, scratch2, ins->shift());
+
+  if (mir->canBeNegativeDividend()) {
+    if (!mir->isTruncated()) {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Zero, ins->snapshot());
+    } else {
+      // -0|0 == 0
+    }
+  }
+}
+
+void CodeGeneratorARM::emitBigIntDiv(LBigIntDiv* ins, Register dividend,
+                                     Register divisor, Register output,
+                                     Label* fail) {
+  // Callers handle division by zero and integer overflow.
+
+  if (HasIDIV()) {
+    masm.ma_sdiv(dividend, divisor, /* result= */ dividend);
+
+    // Create and return the result.
+    masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+    masm.initializeBigInt(output, dividend);
+
+    return;
+  }
+
+  // idivmod returns the quotient in r0, and the remainder in r1.
+  MOZ_ASSERT(dividend == r0);
+  MOZ_ASSERT(divisor == r1);
+
+  LiveRegisterSet volatileRegs = liveVolatileRegs(ins);
+  volatileRegs.takeUnchecked(dividend);
+  volatileRegs.takeUnchecked(divisor);
+  volatileRegs.takeUnchecked(output);
+
+  masm.PushRegsInMask(volatileRegs);
+
+  using Fn = int64_t (*)(int, int);
+  masm.setupUnalignedABICall(output);
+  masm.passABIArg(dividend);
+  masm.passABIArg(divisor);
+  masm.callWithABI<Fn, __aeabi_idivmod>(MoveOp::GENERAL,
+                                        CheckUnsafeCallWithABI::DontCheckOther);
+
+  masm.PopRegsInMask(volatileRegs);
+
+  // Create and return the result.
+  masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+  masm.initializeBigInt(output, dividend);
+}
+
+void CodeGeneratorARM::emitBigIntMod(LBigIntMod* ins, Register dividend,
+                                     Register divisor, Register output,
+                                     Label* fail) {
+  // Callers handle division by zero and integer overflow.
+
+  if (HasIDIV()) {
+    {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_smod(dividend, divisor, /* result= */ dividend, scratch);
+    }
+
+    // Create and return the result.
+    masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+    masm.initializeBigInt(output, dividend);
+
+    return;
+  }
+
+  // idivmod returns the quotient in r0, and the remainder in r1.
+  MOZ_ASSERT(dividend == r0);
+  MOZ_ASSERT(divisor == r1);
+
+  LiveRegisterSet volatileRegs = liveVolatileRegs(ins);
+  volatileRegs.takeUnchecked(dividend);
+  volatileRegs.takeUnchecked(divisor);
+  volatileRegs.takeUnchecked(output);
+
+  masm.PushRegsInMask(volatileRegs);
+
+  using Fn = int64_t (*)(int, int);
+  masm.setupUnalignedABICall(output);
+  masm.passABIArg(dividend);
+  masm.passABIArg(divisor);
+  masm.callWithABI<Fn, __aeabi_idivmod>(MoveOp::GENERAL,
+                                        CheckUnsafeCallWithABI::DontCheckOther);
+
+  masm.PopRegsInMask(volatileRegs);
+
+  // Create and return the result.
+  masm.newGCBigInt(output, dividend, initialBigIntHeap(), fail);
+  masm.initializeBigInt(output, divisor);
+}
+
+void CodeGenerator::visitBitNotI(LBitNotI* ins) {
+  const LAllocation* input = ins->getOperand(0);
+  const LDefinition* dest = ins->getDef(0);
+  // This will not actually be true on arm. We can not an imm8m in order to
+  // get a wider range of numbers
+  MOZ_ASSERT(!input->isConstant());
+
+  masm.ma_mvn(ToRegister(input), ToRegister(dest));
+}
+
+void CodeGenerator::visitBitOpI(LBitOpI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+
+  ScratchRegisterScope scratch(masm);
+
+  // All of these bitops should be either imm32's, or integer registers.
+  switch (ins->bitop()) {
+    case JSOp::BitOr:
+      if (rhs->isConstant()) {
+        masm.ma_orr(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest),
+                    scratch);
+      } else {
+        masm.ma_orr(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
+      }
+      break;
+    case JSOp::BitXor:
+      if (rhs->isConstant()) {
+        masm.ma_eor(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest),
+                    scratch);
+      } else {
+        masm.ma_eor(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
+      }
+      break;
+    case JSOp::BitAnd:
+      if (rhs->isConstant()) {
+        masm.ma_and(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest),
+                    scratch);
+      } else {
+        masm.ma_and(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
+      }
+      break;
+    default:
+      MOZ_CRASH("unexpected binary opcode");
+  }
+}
+
+void CodeGenerator::visitShiftI(LShiftI* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  const LAllocation* rhs = ins->rhs();
+  Register dest = ToRegister(ins->output());
+
+  if (rhs->isConstant()) {
+    int32_t shift = ToInt32(rhs) & 0x1F;
+    switch (ins->bitop()) {
+      case JSOp::Lsh:
+        if (shift) {
+          masm.ma_lsl(Imm32(shift), lhs, dest);
+        } else {
+          masm.ma_mov(lhs, dest);
+        }
+        break;
+      case JSOp::Rsh:
+        if (shift) {
+          masm.ma_asr(Imm32(shift), lhs, dest);
+        } else {
+          masm.ma_mov(lhs, dest);
+        }
+        break;
+      case JSOp::Ursh:
+        if (shift) {
+          masm.ma_lsr(Imm32(shift), lhs, dest);
+        } else {
+          // x >>> 0 can overflow.
+          masm.ma_mov(lhs, dest);
+          if (ins->mir()->toUrsh()->fallible()) {
+            masm.as_cmp(dest, Imm8(0));
+            bailoutIf(Assembler::LessThan, ins->snapshot());
+          }
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  } else {
+    // The shift amounts should be AND'ed into the 0-31 range since arm
+    // shifts by the lower byte of the register (it will attempt to shift by
+    // 250 if you ask it to).
+    masm.as_and(dest, ToRegister(rhs), Imm8(0x1F));
+
+    switch (ins->bitop()) {
+      case JSOp::Lsh:
+        masm.ma_lsl(dest, lhs, dest);
+        break;
+      case JSOp::Rsh:
+        masm.ma_asr(dest, lhs, dest);
+        break;
+      case JSOp::Ursh:
+        masm.ma_lsr(dest, lhs, dest);
+        if (ins->mir()->toUrsh()->fallible()) {
+          // x >>> 0 can overflow.
+          masm.as_cmp(dest, Imm8(0));
+          bailoutIf(Assembler::LessThan, ins->snapshot());
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  }
+}
+
+void CodeGenerator::visitUrshD(LUrshD* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register temp = ToRegister(ins->temp());
+
+  const LAllocation* rhs = ins->rhs();
+  FloatRegister out = ToFloatRegister(ins->output());
+
+  if (rhs->isConstant()) {
+    int32_t shift = ToInt32(rhs) & 0x1F;
+    if (shift) {
+      masm.ma_lsr(Imm32(shift), lhs, temp);
+    } else {
+      masm.ma_mov(lhs, temp);
+    }
+  } else {
+    masm.as_and(temp, ToRegister(rhs), Imm8(0x1F));
+    masm.ma_lsr(temp, lhs, temp);
+  }
+
+  masm.convertUInt32ToDouble(temp, out);
+}
+
+void CodeGenerator::visitClzI(LClzI* ins) {
+  Register input = ToRegister(ins->input());
+  Register output = ToRegister(ins->output());
+
+  masm.clz32(input, output, /* knownNotZero = */ false);
+}
+
+void CodeGenerator::visitCtzI(LCtzI* ins) {
+  Register input = ToRegister(ins->input());
+  Register output = ToRegister(ins->output());
+
+  masm.ctz32(input, output, /* knownNotZero = */ false);
+}
+
+void CodeGenerator::visitPopcntI(LPopcntI* ins) {
+  Register input = ToRegister(ins->input());
+  Register output = ToRegister(ins->output());
+
+  Register tmp = ToRegister(ins->temp0());
+
+  masm.popcnt32(input, output, tmp);
+}
+
+void CodeGenerator::visitPowHalfD(LPowHalfD* ins) {
+  FloatRegister input = ToFloatRegister(ins->input());
+  FloatRegister output = ToFloatRegister(ins->output());
+  ScratchDoubleScope scratch(masm);
+
+  Label done;
+
+  // Masm.pow(-Infinity, 0.5) == Infinity.
+  masm.loadConstantDouble(NegativeInfinity<double>(), scratch);
+  masm.compareDouble(input, scratch);
+  masm.ma_vneg(scratch, output, Assembler::Equal);
+  masm.ma_b(&done, Assembler::Equal);
+
+  // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5).
+  // Adding 0 converts any -0 to 0.
+  masm.loadConstantDouble(0.0, scratch);
+  masm.ma_vadd(scratch, input, output);
+  masm.ma_vsqrt(output, output);
+
+  masm.bind(&done);
+}
+
+MoveOperand CodeGeneratorARM::toMoveOperand(LAllocation a) const {
+  if (a.isGeneralReg()) {
+    return MoveOperand(ToRegister(a));
+  }
+  if (a.isFloatReg()) {
+    return MoveOperand(ToFloatRegister(a));
+  }
+  MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress
+                                           : MoveOperand::Kind::Memory;
+  Address addr = ToAddress(a);
+  MOZ_ASSERT((addr.offset & 3) == 0);
+  return MoveOperand(addr, kind);
+}
+
+class js::jit::OutOfLineTableSwitch
+    : public OutOfLineCodeBase<CodeGeneratorARM> {
+  MTableSwitch* mir_;
+  Vector<CodeLabel, 8, JitAllocPolicy> codeLabels_;
+
+  void accept(CodeGeneratorARM* codegen) override {
+    codegen->visitOutOfLineTableSwitch(this);
+  }
+
+ public:
+  OutOfLineTableSwitch(TempAllocator& alloc, MTableSwitch* mir)
+      : mir_(mir), codeLabels_(alloc) {}
+
+  MTableSwitch* mir() const { return mir_; }
+
+  bool addCodeLabel(CodeLabel label) { return codeLabels_.append(label); }
+  CodeLabel codeLabel(unsigned i) { return codeLabels_[i]; }
+};
+
+void CodeGeneratorARM::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool) {
+  MTableSwitch* mir = ool->mir();
+
+  size_t numCases = mir->numCases();
+  for (size_t i = 0; i < numCases; i++) {
+    LBlock* caseblock =
+        skipTrivialBlocks(mir->getCase(numCases - 1 - i))->lir();
+    Label* caseheader = caseblock->label();
+    uint32_t caseoffset = caseheader->offset();
+
+    // The entries of the jump table need to be absolute addresses and thus
+    // must be patched after codegen is finished.
+    CodeLabel cl = ool->codeLabel(i);
+    cl.target()->bind(caseoffset);
+    masm.addCodeLabel(cl);
+  }
+}
+
+void CodeGeneratorARM::emitTableSwitchDispatch(MTableSwitch* mir,
+                                               Register index, Register base) {
+  // The code generated by this is utter hax.
+  // The end result looks something like:
+  // SUBS index, input, #base
+  // RSBSPL index, index, #max
+  // LDRPL pc, pc, index lsl 2
+  // B default
+
+  // If the range of targets in N through M, we first subtract off the lowest
+  // case (N), which both shifts the arguments into the range 0 to (M - N)
+  // with and sets the MInus flag if the argument was out of range on the low
+  // end.
+
+  // Then we a reverse subtract with the size of the jump table, which will
+  // reverse the order of range (It is size through 0, rather than 0 through
+  // size). The main purpose of this is that we set the same flag as the lower
+  // bound check for the upper bound check. Lastly, we do this conditionally
+  // on the previous check succeeding.
+
+  // Then we conditionally load the pc offset by the (reversed) index (times
+  // the address size) into the pc, which branches to the correct case. NOTE:
+  // when we go to read the pc, the value that we get back is the pc of the
+  // current instruction *PLUS 8*. This means that ldr foo, [pc, +0] reads
+  // $pc+8. In other words, there is an empty word after the branch into the
+  // switch table before the table actually starts. Since the only other
+  // unhandled case is the default case (both out of range high and out of
+  // range low) I then insert a branch to default case into the extra slot,
+  // which ensures we don't attempt to execute the address table.
+  Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
+
+  ScratchRegisterScope scratch(masm);
+
+  int32_t cases = mir->numCases();
+  // Lower value with low value.
+  masm.ma_sub(index, Imm32(mir->low()), index, scratch, SetCC);
+  masm.ma_rsb(index, Imm32(cases - 1), index, scratch, SetCC,
+              Assembler::NotSigned);
+  // Inhibit pools within the following sequence because we are indexing into
+  // a pc relative table. The region will have one instruction for ma_ldr, one
+  // for ma_b, and each table case takes one word.
+  AutoForbidPoolsAndNops afp(&masm, 1 + 1 + cases);
+  masm.ma_ldr(DTRAddr(pc, DtrRegImmShift(index, LSL, 2)), pc, Offset,
+              Assembler::NotSigned);
+  masm.ma_b(defaultcase);
+
+  // To fill in the CodeLabels for the case entries, we need to first generate
+  // the case entries (we don't yet know their offsets in the instruction
+  // stream).
+  OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(alloc(), mir);
+  for (int32_t i = 0; i < cases; i++) {
+    CodeLabel cl;
+    masm.writeCodePointer(&cl);
+    masm.propagateOOM(ool->addCodeLabel(cl));
+  }
+  addOutOfLineCode(ool, mir);
+}
+
+void CodeGenerator::visitMathD(LMathD* math) {
+  FloatRegister src1 = ToFloatRegister(math->getOperand(0));
+  FloatRegister src2 = ToFloatRegister(math->getOperand(1));
+  FloatRegister output = ToFloatRegister(math->getDef(0));
+
+  switch (math->jsop()) {
+    case JSOp::Add:
+      masm.ma_vadd(src1, src2, output);
+      break;
+    case JSOp::Sub:
+      masm.ma_vsub(src1, src2, output);
+      break;
+    case JSOp::Mul:
+      masm.ma_vmul(src1, src2, output);
+      break;
+    case JSOp::Div:
+      masm.ma_vdiv(src1, src2, output);
+      break;
+    default:
+      MOZ_CRASH("unexpected opcode");
+  }
+}
+
+void CodeGenerator::visitMathF(LMathF* math) {
+  FloatRegister src1 = ToFloatRegister(math->getOperand(0));
+  FloatRegister src2 = ToFloatRegister(math->getOperand(1));
+  FloatRegister output = ToFloatRegister(math->getDef(0));
+
+  switch (math->jsop()) {
+    case JSOp::Add:
+      masm.ma_vadd_f32(src1, src2, output);
+      break;
+    case JSOp::Sub:
+      masm.ma_vsub_f32(src1, src2, output);
+      break;
+    case JSOp::Mul:
+      masm.ma_vmul_f32(src1, src2, output);
+      break;
+    case JSOp::Div:
+      masm.ma_vdiv_f32(src1, src2, output);
+      break;
+    default:
+      MOZ_CRASH("unexpected opcode");
+  }
+}
+
+void CodeGenerator::visitTruncateDToInt32(LTruncateDToInt32* ins) {
+  emitTruncateDouble(ToFloatRegister(ins->input()), ToRegister(ins->output()),
+                     ins->mir());
+}
+
+void CodeGenerator::visitWasmBuiltinTruncateDToInt32(
+    LWasmBuiltinTruncateDToInt32* ins) {
+  emitTruncateDouble(ToFloatRegister(ins->getOperand(0)),
+                     ToRegister(ins->getDef(0)), ins->mir());
+}
+
+void CodeGenerator::visitTruncateFToInt32(LTruncateFToInt32* ins) {
+  emitTruncateFloat32(ToFloatRegister(ins->input()), ToRegister(ins->output()),
+                      ins->mir());
+}
+
+void CodeGenerator::visitWasmBuiltinTruncateFToInt32(
+    LWasmBuiltinTruncateFToInt32* ins) {
+  emitTruncateFloat32(ToFloatRegister(ins->getOperand(0)),
+                      ToRegister(ins->getDef(0)), ins->mir());
+}
+
+ValueOperand CodeGeneratorARM::ToValue(LInstruction* ins, size_t pos) {
+  Register typeReg = ToRegister(ins->getOperand(pos + TYPE_INDEX));
+  Register payloadReg = ToRegister(ins->getOperand(pos + PAYLOAD_INDEX));
+  return ValueOperand(typeReg, payloadReg);
+}
+
+ValueOperand CodeGeneratorARM::ToTempValue(LInstruction* ins, size_t pos) {
+  Register typeReg = ToRegister(ins->getTemp(pos + TYPE_INDEX));
+  Register payloadReg = ToRegister(ins->getTemp(pos + PAYLOAD_INDEX));
+  return ValueOperand(typeReg, payloadReg);
+}
+
+void CodeGenerator::visitValue(LValue* value) {
+  const ValueOperand out = ToOutValue(value);
+
+  masm.moveValue(value->value(), out);
+}
+
+void CodeGenerator::visitBox(LBox* box) {
+  const LDefinition* type = box->getDef(TYPE_INDEX);
+
+  MOZ_ASSERT(!box->getOperand(0)->isConstant());
+
+  // On arm, the input operand and the output payload have the same virtual
+  // register. All that needs to be written is the type tag for the type
+  // definition.
+  masm.ma_mov(Imm32(MIRTypeToTag(box->type())), ToRegister(type));
+}
+
+void CodeGenerator::visitBoxFloatingPoint(LBoxFloatingPoint* box) {
+  const AnyRegister in = ToAnyRegister(box->getOperand(0));
+  const ValueOperand out = ToOutValue(box);
+
+  masm.moveValue(TypedOrValueRegister(box->type(), in), out);
+}
+
+void CodeGenerator::visitUnbox(LUnbox* unbox) {
+  // Note that for unbox, the type and payload indexes are switched on the
+  // inputs.
+  MUnbox* mir = unbox->mir();
+  Register type = ToRegister(unbox->type());
+  Register payload = ToRegister(unbox->payload());
+  Register output = ToRegister(unbox->output());
+
+  mozilla::Maybe<ScratchRegisterScope> scratch;
+  scratch.emplace(masm);
+
+  JSValueTag tag = MIRTypeToTag(mir->type());
+  if (mir->fallible()) {
+    masm.ma_cmp(type, Imm32(tag), *scratch);
+    bailoutIf(Assembler::NotEqual, unbox->snapshot());
+  } else {
+#ifdef DEBUG
+    Label ok;
+    masm.ma_cmp(type, Imm32(tag), *scratch);
+    masm.ma_b(&ok, Assembler::Equal);
+    scratch.reset();
+    masm.assumeUnreachable("Infallible unbox type mismatch");
+    masm.bind(&ok);
+#endif
+  }
+
+  // Note: If spectreValueMasking is disabled, then this instruction will
+  // default to a no-op as long as the lowering allocate the same register for
+  // the output and the payload.
+  masm.unboxNonDouble(ValueOperand(type, payload), output,
+                      ValueTypeFromMIRType(mir->type()));
+}
+
+void CodeGenerator::visitDouble(LDouble* ins) {
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantDouble(ins->value(), ToFloatRegister(out));
+}
+
+void CodeGenerator::visitFloat32(LFloat32* ins) {
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantFloat32(ins->value(), ToFloatRegister(out));
+}
+
+void CodeGeneratorARM::splitTagForTest(const ValueOperand& value,
+                                       ScratchTagScope& tag) {
+  MOZ_ASSERT(value.typeReg() == tag);
+}
+
+void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) {
+  const LAllocation* opd = test->input();
+  masm.ma_vcmpz(ToFloatRegister(opd));
+  masm.as_vmrs(pc);
+
+  MBasicBlock* ifTrue = test->ifTrue();
+  MBasicBlock* ifFalse = test->ifFalse();
+  // If the compare set the 0 bit, then the result is definitely false.
+  jumpToBlock(ifFalse, Assembler::Zero);
+  // It is also false if one of the operands is NAN, which is shown as
+  // Overflow.
+  jumpToBlock(ifFalse, Assembler::Overflow);
+  jumpToBlock(ifTrue);
+}
+
+void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) {
+  const LAllocation* opd = test->input();
+  masm.ma_vcmpz_f32(ToFloatRegister(opd));
+  masm.as_vmrs(pc);
+
+  MBasicBlock* ifTrue = test->ifTrue();
+  MBasicBlock* ifFalse = test->ifFalse();
+  // If the compare set the 0 bit, then the result is definitely false.
+  jumpToBlock(ifFalse, Assembler::Zero);
+  // It is also false if one of the operands is NAN, which is shown as
+  // Overflow.
+  jumpToBlock(ifFalse, Assembler::Overflow);
+  jumpToBlock(ifTrue);
+}
+
+void CodeGenerator::visitCompareD(LCompareD* comp) {
+  FloatRegister lhs = ToFloatRegister(comp->left());
+  FloatRegister rhs = ToFloatRegister(comp->right());
+
+  Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+  masm.compareDouble(lhs, rhs);
+  masm.emitSet(Assembler::ConditionFromDoubleCondition(cond),
+               ToRegister(comp->output()));
+}
+
+void CodeGenerator::visitCompareF(LCompareF* comp) {
+  FloatRegister lhs = ToFloatRegister(comp->left());
+  FloatRegister rhs = ToFloatRegister(comp->right());
+
+  Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+  masm.compareFloat(lhs, rhs);
+  masm.emitSet(Assembler::ConditionFromDoubleCondition(cond),
+               ToRegister(comp->output()));
+}
+
+void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) {
+  FloatRegister lhs = ToFloatRegister(comp->left());
+  FloatRegister rhs = ToFloatRegister(comp->right());
+
+  Assembler::DoubleCondition cond =
+      JSOpToDoubleCondition(comp->cmpMir()->jsop());
+  masm.compareDouble(lhs, rhs);
+  emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(),
+             comp->ifFalse());
+}
+
+void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) {
+  FloatRegister lhs = ToFloatRegister(comp->left());
+  FloatRegister rhs = ToFloatRegister(comp->right());
+
+  Assembler::DoubleCondition cond =
+      JSOpToDoubleCondition(comp->cmpMir()->jsop());
+  masm.compareFloat(lhs, rhs);
+  emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(),
+             comp->ifFalse());
+}
+
+void CodeGenerator::visitBitAndAndBranch(LBitAndAndBranch* baab) {
+  // LBitAndAndBranch only represents single-word ANDs, hence it can't be
+  // 64-bit here.
+  MOZ_ASSERT(!baab->is64());
+  Register regL = ToRegister(baab->left());
+  if (baab->right()->isConstant()) {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_tst(regL, Imm32(ToInt32(baab->right())), scratch);
+  } else {
+    masm.ma_tst(regL, ToRegister(baab->right()));
+  }
+  emitBranch(baab->cond(), baab->ifTrue(), baab->ifFalse());
+}
+
+void CodeGenerator::visitWasmUint32ToDouble(LWasmUint32ToDouble* lir) {
+  masm.convertUInt32ToDouble(ToRegister(lir->input()),
+                             ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitWasmUint32ToFloat32(LWasmUint32ToFloat32* lir) {
+  masm.convertUInt32ToFloat32(ToRegister(lir->input()),
+                              ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitNotI(LNotI* ins) {
+  // It is hard to optimize !x, so just do it the basic way for now.
+  masm.as_cmp(ToRegister(ins->input()), Imm8(0));
+  masm.emitSet(Assembler::Equal, ToRegister(ins->output()));
+}
+
+void CodeGenerator::visitNotI64(LNotI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register output = ToRegister(lir->output());
+
+  masm.ma_orr(input.low, input.high, output);
+  masm.as_cmp(output, Imm8(0));
+  masm.emitSet(Assembler::Equal, output);
+}
+
+void CodeGenerator::visitNotD(LNotD* ins) {
+  // Since this operation is not, we want to set a bit if the double is
+  // falsey, which means 0.0, -0.0 or NaN. When comparing with 0, an input of
+  // 0 will set the Z bit (30) and NaN will set the V bit (28) of the APSR.
+  FloatRegister opd = ToFloatRegister(ins->input());
+  Register dest = ToRegister(ins->output());
+
+  // Do the compare.
+  masm.ma_vcmpz(opd);
+  // TODO There are three variations here to compare performance-wise.
+  bool nocond = true;
+  if (nocond) {
+    // Load the value into the dest register.
+    masm.as_vmrs(dest);
+    masm.ma_lsr(Imm32(28), dest, dest);
+    // 28 + 2 = 30
+    masm.ma_alu(dest, lsr(dest, 2), dest, OpOrr);
+    masm.as_and(dest, dest, Imm8(1));
+  } else {
+    masm.as_vmrs(pc);
+    masm.ma_mov(Imm32(0), dest);
+    masm.ma_mov(Imm32(1), dest, Assembler::Equal);
+    masm.ma_mov(Imm32(1), dest, Assembler::Overflow);
+  }
+}
+
+void CodeGenerator::visitNotF(LNotF* ins) {
+  // Since this operation is not, we want to set a bit if the double is
+  // falsey, which means 0.0, -0.0 or NaN. When comparing with 0, an input of
+  // 0 will set the Z bit (30) and NaN will set the V bit (28) of the APSR.
+  FloatRegister opd = ToFloatRegister(ins->input());
+  Register dest = ToRegister(ins->output());
+
+  // Do the compare.
+  masm.ma_vcmpz_f32(opd);
+  // TODO There are three variations here to compare performance-wise.
+  bool nocond = true;
+  if (nocond) {
+    // Load the value into the dest register.
+    masm.as_vmrs(dest);
+    masm.ma_lsr(Imm32(28), dest, dest);
+    // 28 + 2 = 30
+    masm.ma_alu(dest, lsr(dest, 2), dest, OpOrr);
+    masm.as_and(dest, dest, Imm8(1));
+  } else {
+    masm.as_vmrs(pc);
+    masm.ma_mov(Imm32(0), dest);
+    masm.ma_mov(Imm32(1), dest, Assembler::Equal);
+    masm.ma_mov(Imm32(1), dest, Assembler::Overflow);
+  }
+}
+
+void CodeGeneratorARM::generateInvalidateEpilogue() {
+  // Ensure that there is enough space in the buffer for the OsiPoint patching
+  // to occur. Otherwise, we could overwrite the invalidation epilogue.
+  for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) {
+    masm.nop();
+  }
+
+  masm.bind(&invalidate_);
+
+  // Push the return address of the point that we bailed out at onto the stack.
+  masm.Push(lr);
+
+  // Push the Ion script onto the stack (when we determine what that pointer
+  // is).
+  invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
+
+  // Jump to the invalidator which will replace the current frame.
+  TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk();
+  masm.jump(thunk);
+}
+
+void CodeGenerator::visitCompareExchangeTypedArrayElement(
+    LCompareExchangeTypedArrayElement* lir) {
+  Register elements = ToRegister(lir->elements());
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register temp =
+      lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+  Register oldval = ToRegister(lir->oldval());
+  Register newval = ToRegister(lir->newval());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
+                           newval, temp, output);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
+                           newval, temp, output);
+  }
+}
+
+void CodeGenerator::visitAtomicExchangeTypedArrayElement(
+    LAtomicExchangeTypedArrayElement* lir) {
+  Register elements = ToRegister(lir->elements());
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register temp =
+      lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+  Register value = ToRegister(lir->value());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
+                          output);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
+                          output);
+  }
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinop(
+    LAtomicTypedArrayElementBinop* lir) {
+  MOZ_ASSERT(!lir->mir()->isForEffect());
+
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register elements = ToRegister(lir->elements());
+  Register flagTemp = ToRegister(lir->temp1());
+  Register outTemp =
+      lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
+  Register value = ToRegister(lir->value());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address mem = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
+                         lir->mir()->operation(), value, mem, flagTemp, outTemp,
+                         output);
+  } else {
+    BaseIndex mem(elements, ToRegister(lir->index()),
+                  ScaleFromScalarType(arrayType));
+    masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
+                         lir->mir()->operation(), value, mem, flagTemp, outTemp,
+                         output);
+  }
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect(
+    LAtomicTypedArrayElementBinopForEffect* lir) {
+  MOZ_ASSERT(lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register flagTemp = ToRegister(lir->flagTemp());
+  Register value = ToRegister(lir->value());
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address mem = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
+                          lir->mir()->operation(), value, mem, flagTemp);
+  } else {
+    BaseIndex mem(elements, ToRegister(lir->index()),
+                  ScaleFromScalarType(arrayType));
+    masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
+                          lir->mir()->operation(), value, mem, flagTemp);
+  }
+}
+
+void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register temp = ToRegister(lir->temp());
+  Register64 temp64 = ToRegister64(lir->temp64());
+  Register out = ToRegister(lir->output());
+
+  const MLoadUnboxedScalar* mir = lir->mir();
+
+  Scalar::Type storageType = mir->storageType();
+
+  if (lir->index()->isConstant()) {
+    Address source =
+        ToAddress(elements, lir->index(), storageType, mir->offsetAdjustment());
+    masm.atomicLoad64(Synchronization::Load(), source, temp64);
+  } else {
+    BaseIndex source(elements, ToRegister(lir->index()),
+                     ScaleFromScalarType(storageType), mir->offsetAdjustment());
+    masm.atomicLoad64(Synchronization::Load(), source, temp64);
+  }
+
+  emitCreateBigInt(lir, storageType, temp64, out, temp);
+}
+
+void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+
+  Scalar::Type writeType = lir->mir()->writeType();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), writeType);
+    masm.atomicStore64(Synchronization::Store(), dest, temp1, temp2);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(writeType));
+    masm.atomicStore64(Synchronization::Store(), dest, temp1, temp2);
+  }
+}
+
+void CodeGenerator::visitCompareExchangeTypedArrayElement64(
+    LCompareExchangeTypedArrayElement64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register oldval = ToRegister(lir->oldval());
+  Register newval = ToRegister(lir->newval());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+  Register64 temp3 = ToRegister64(lir->temp3());
+  Register out = ToRegister(lir->output());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  masm.loadBigInt64(oldval, temp1);
+  masm.loadBigInt64(newval, temp2);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.compareExchange64(Synchronization::Full(), dest, temp1, temp2, temp3);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.compareExchange64(Synchronization::Full(), dest, temp1, temp2, temp3);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp3, out, temp1.scratchReg());
+}
+
+void CodeGenerator::visitAtomicExchangeTypedArrayElement64(
+    LAtomicExchangeTypedArrayElement64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register temp2 = ToRegister(lir->temp2());
+  Register out = ToRegister(lir->output());
+  Register64 temp64 = Register64(temp2, out);
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  masm.loadBigInt64(value, temp64);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicExchange64(Synchronization::Full(), dest, temp64, temp1);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicExchange64(Synchronization::Full(), dest, temp64, temp1);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp1, out, temp2);
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinop64(
+    LAtomicTypedArrayElementBinop64* lir) {
+  MOZ_ASSERT(!lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+  Register64 temp3 = ToRegister64(lir->temp3());
+  Register out = ToRegister(lir->output());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+  AtomicOp atomicOp = lir->mir()->operation();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest, temp2,
+                         temp3);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest, temp2,
+                         temp3);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp3, out, temp2.scratchReg());
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect64(
+    LAtomicTypedArrayElementBinopForEffect64* lir) {
+  MOZ_ASSERT(lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+  AtomicOp atomicOp = lir->mir()->operation();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest,
+                          temp2);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest,
+                          temp2);
+  }
+}
+
+void CodeGenerator::visitWasmSelect(LWasmSelect* ins) {
+  MIRType mirType = ins->mir()->type();
+
+  Register cond = ToRegister(ins->condExpr());
+  masm.as_cmp(cond, Imm8(0));
+
+  if (mirType == MIRType::Int32 || mirType == MIRType::RefOrNull) {
+    Register falseExpr = ToRegister(ins->falseExpr());
+    Register out = ToRegister(ins->output());
+    MOZ_ASSERT(ToRegister(ins->trueExpr()) == out,
+               "true expr input is reused for output");
+    masm.ma_mov(falseExpr, out, LeaveCC, Assembler::Zero);
+    return;
+  }
+
+  FloatRegister out = ToFloatRegister(ins->output());
+  MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out,
+             "true expr input is reused for output");
+
+  FloatRegister falseExpr = ToFloatRegister(ins->falseExpr());
+
+  if (mirType == MIRType::Double) {
+    masm.moveDouble(falseExpr, out, Assembler::Zero);
+  } else if (mirType == MIRType::Float32) {
+    masm.moveFloat32(falseExpr, out, Assembler::Zero);
+  } else {
+    MOZ_CRASH("unhandled type in visitWasmSelect!");
+  }
+}
+
+// We expect to handle only the case where compare is {U,}Int32 and select is
+// {U,}Int32, and the "true" input is reused for the output.
+void CodeGenerator::visitWasmCompareAndSelect(LWasmCompareAndSelect* ins) {
+  bool cmpIs32bit = ins->compareType() == MCompare::Compare_Int32 ||
+                    ins->compareType() == MCompare::Compare_UInt32;
+  bool selIs32bit = ins->mir()->type() == MIRType::Int32;
+
+  MOZ_RELEASE_ASSERT(
+      cmpIs32bit && selIs32bit,
+      "CodeGenerator::visitWasmCompareAndSelect: unexpected types");
+
+  Register trueExprAndDest = ToRegister(ins->output());
+  MOZ_ASSERT(ToRegister(ins->ifTrueExpr()) == trueExprAndDest,
+             "true expr input is reused for output");
+
+  Assembler::Condition cond = Assembler::InvertCondition(
+      JSOpToCondition(ins->compareType(), ins->jsop()));
+  const LAllocation* rhs = ins->rightExpr();
+  const LAllocation* falseExpr = ins->ifFalseExpr();
+  Register lhs = ToRegister(ins->leftExpr());
+
+  masm.cmp32Move32(cond, lhs, ToRegister(rhs), ToRegister(falseExpr),
+                   trueExprAndDest);
+}
+
+void CodeGenerator::visitWasmReinterpret(LWasmReinterpret* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MWasmReinterpret* ins = lir->mir();
+
+  MIRType to = ins->type();
+  DebugOnly<MIRType> from = ins->input()->type();
+
+  switch (to) {
+    case MIRType::Int32:
+      MOZ_ASSERT(static_cast<MIRType>(from) == MIRType::Float32);
+      masm.ma_vxfer(ToFloatRegister(lir->input()), ToRegister(lir->output()));
+      break;
+    case MIRType::Float32:
+      MOZ_ASSERT(static_cast<MIRType>(from) == MIRType::Int32);
+      masm.ma_vxfer(ToRegister(lir->input()), ToFloatRegister(lir->output()));
+      break;
+    case MIRType::Double:
+    case MIRType::Int64:
+      MOZ_CRASH("not handled by this LIR opcode");
+    default:
+      MOZ_CRASH("unexpected WasmReinterpret");
+  }
+}
+
+void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) {
+  const MAsmJSLoadHeap* mir = ins->mir();
+
+  const LAllocation* ptr = ins->ptr();
+  const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+  bool isSigned;
+  int size;
+  bool isFloat = false;
+  switch (mir->accessType()) {
+    case Scalar::Int8:
+      isSigned = true;
+      size = 8;
+      break;
+    case Scalar::Uint8:
+      isSigned = false;
+      size = 8;
+      break;
+    case Scalar::Int16:
+      isSigned = true;
+      size = 16;
+      break;
+    case Scalar::Uint16:
+      isSigned = false;
+      size = 16;
+      break;
+    case Scalar::Int32:
+    case Scalar::Uint32:
+      isSigned = true;
+      size = 32;
+      break;
+    case Scalar::Float64:
+      isFloat = true;
+      size = 64;
+      break;
+    case Scalar::Float32:
+      isFloat = true;
+      size = 32;
+      break;
+    default:
+      MOZ_CRASH("unexpected array type");
+  }
+
+  if (ptr->isConstant()) {
+    MOZ_ASSERT(!mir->needsBoundsCheck());
+    int32_t ptrImm = ptr->toConstant()->toInt32();
+    MOZ_ASSERT(ptrImm >= 0);
+    if (isFloat) {
+      ScratchRegisterScope scratch(masm);
+      VFPRegister vd(ToFloatRegister(ins->output()));
+      if (size == 32) {
+        masm.ma_vldr(Address(HeapReg, ptrImm), vd.singleOverlay(), scratch,
+                     Assembler::Always);
+      } else {
+        masm.ma_vldr(Address(HeapReg, ptrImm), vd, scratch, Assembler::Always);
+      }
+    } else {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_dataTransferN(IsLoad, size, isSigned, HeapReg, Imm32(ptrImm),
+                            ToRegister(ins->output()), scratch, Offset,
+                            Assembler::Always);
+    }
+  } else {
+    Register ptrReg = ToRegister(ptr);
+    if (isFloat) {
+      FloatRegister output = ToFloatRegister(ins->output());
+      if (size == 32) {
+        output = output.singleOverlay();
+      }
+
+      Assembler::Condition cond = Assembler::Always;
+      if (mir->needsBoundsCheck()) {
+        Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+        masm.as_cmp(ptrReg, O2Reg(boundsCheckLimitReg));
+        if (size == 32) {
+          masm.ma_vimm_f32(GenericNaN(), output, Assembler::AboveOrEqual);
+        } else {
+          masm.ma_vimm(GenericNaN(), output, Assembler::AboveOrEqual);
+        }
+        cond = Assembler::Below;
+      }
+
+      ScratchRegisterScope scratch(masm);
+      masm.ma_vldr(output, HeapReg, ptrReg, scratch, 0, cond);
+    } else {
+      Register output = ToRegister(ins->output());
+
+      Assembler::Condition cond = Assembler::Always;
+      if (mir->needsBoundsCheck()) {
+        Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+        masm.as_cmp(ptrReg, O2Reg(boundsCheckLimitReg));
+        masm.ma_mov(Imm32(0), output, Assembler::AboveOrEqual);
+        cond = Assembler::Below;
+      }
+
+      ScratchRegisterScope scratch(masm);
+      masm.ma_dataTransferN(IsLoad, size, isSigned, HeapReg, ptrReg, output,
+                            scratch, Offset, cond);
+    }
+  }
+}
+
+void CodeGenerator::visitWasmHeapBase(LWasmHeapBase* ins) {
+  MOZ_ASSERT(ins->instance()->isBogus());
+  masm.movePtr(HeapReg, ToRegister(ins->output()));
+}
+
+template <typename T>
+void CodeGeneratorARM::emitWasmLoad(T* lir) {
+  const MWasmLoad* mir = lir->mir();
+  MIRType resultType = mir->type();
+  Register ptr;
+
+  if (mir->access().offset() || mir->access().type() == Scalar::Int64) {
+    ptr = ToRegister(lir->ptrCopy());
+  } else {
+    MOZ_ASSERT(lir->ptrCopy()->isBogusTemp());
+    ptr = ToRegister(lir->ptr());
+  }
+
+  if (resultType == MIRType::Int64) {
+    masm.wasmLoadI64(mir->access(), HeapReg, ptr, ptr, ToOutRegister64(lir));
+  } else {
+    masm.wasmLoad(mir->access(), HeapReg, ptr, ptr,
+                  ToAnyRegister(lir->output()));
+  }
+}
+
+void CodeGenerator::visitWasmLoad(LWasmLoad* lir) { emitWasmLoad(lir); }
+
+void CodeGenerator::visitWasmLoadI64(LWasmLoadI64* lir) { emitWasmLoad(lir); }
+
+void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) {
+  MWasmAddOffset* mir = lir->mir();
+  Register base = ToRegister(lir->base());
+  Register out = ToRegister(lir->output());
+
+  ScratchRegisterScope scratch(masm);
+  masm.ma_add(base, Imm32(mir->offset()), out, scratch, SetCC);
+  OutOfLineAbortingWasmTrap* ool = new (alloc())
+      OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds);
+  addOutOfLineCode(ool, mir);
+  masm.ma_b(ool->entry(), Assembler::CarrySet);
+}
+
+void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) {
+  MWasmAddOffset* mir = lir->mir();
+  Register64 base = ToRegister64(lir->base());
+  Register64 out = ToOutRegister64(lir);
+  MOZ_ASSERT(base.low != out.high && base.high != out.low);
+
+  ScratchRegisterScope scratch(masm);
+  masm.ma_add(base.low, Imm32(mir->offset()), out.low, scratch, SetCC);
+  masm.ma_adc(base.high, Imm32(mir->offset() >> 32), out.high, scratch, SetCC);
+  OutOfLineAbortingWasmTrap* ool = new (alloc())
+      OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds);
+  addOutOfLineCode(ool, mir);
+  masm.ma_b(ool->entry(), Assembler::CarrySet);
+}
+
+template <typename T>
+void CodeGeneratorARM::emitWasmStore(T* lir) {
+  const MWasmStore* mir = lir->mir();
+  Scalar::Type accessType = mir->access().type();
+  Register ptr;
+
+  // Maybe add the offset.
+  if (mir->access().offset() || accessType == Scalar::Int64) {
+    ptr = ToRegister(lir->ptrCopy());
+  } else {
+    MOZ_ASSERT(lir->ptrCopy()->isBogusTemp());
+    ptr = ToRegister(lir->ptr());
+  }
+
+  if (accessType == Scalar::Int64) {
+    masm.wasmStoreI64(mir->access(),
+                      ToRegister64(lir->getInt64Operand(lir->ValueIndex)),
+                      HeapReg, ptr, ptr);
+  } else {
+    masm.wasmStore(mir->access(),
+                   ToAnyRegister(lir->getOperand(lir->ValueIndex)), HeapReg,
+                   ptr, ptr);
+  }
+}
+
+void CodeGenerator::visitWasmStore(LWasmStore* lir) { emitWasmStore(lir); }
+
+void CodeGenerator::visitWasmStoreI64(LWasmStoreI64* lir) {
+  emitWasmStore(lir);
+}
+
+void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) {
+  const MAsmJSStoreHeap* mir = ins->mir();
+
+  const LAllocation* ptr = ins->ptr();
+  const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+  bool isSigned;
+  int size;
+  bool isFloat = false;
+  switch (mir->accessType()) {
+    case Scalar::Int8:
+    case Scalar::Uint8:
+      isSigned = false;
+      size = 8;
+      break;
+    case Scalar::Int16:
+    case Scalar::Uint16:
+      isSigned = false;
+      size = 16;
+      break;
+    case Scalar::Int32:
+    case Scalar::Uint32:
+      isSigned = true;
+      size = 32;
+      break;
+    case Scalar::Float64:
+      isFloat = true;
+      size = 64;
+      break;
+    case Scalar::Float32:
+      isFloat = true;
+      size = 32;
+      break;
+    default:
+      MOZ_CRASH("unexpected array type");
+  }
+
+  if (ptr->isConstant()) {
+    MOZ_ASSERT(!mir->needsBoundsCheck());
+    int32_t ptrImm = ptr->toConstant()->toInt32();
+    MOZ_ASSERT(ptrImm >= 0);
+    if (isFloat) {
+      VFPRegister vd(ToFloatRegister(ins->value()));
+      Address addr(HeapReg, ptrImm);
+      if (size == 32) {
+        masm.storeFloat32(vd, addr);
+      } else {
+        masm.storeDouble(vd, addr);
+      }
+    } else {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_dataTransferN(IsStore, size, isSigned, HeapReg, Imm32(ptrImm),
+                            ToRegister(ins->value()), scratch, Offset,
+                            Assembler::Always);
+    }
+  } else {
+    Register ptrReg = ToRegister(ptr);
+
+    Assembler::Condition cond = Assembler::Always;
+    if (mir->needsBoundsCheck()) {
+      Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+      masm.as_cmp(ptrReg, O2Reg(boundsCheckLimitReg));
+      cond = Assembler::Below;
+    }
+
+    if (isFloat) {
+      ScratchRegisterScope scratch(masm);
+      FloatRegister value = ToFloatRegister(ins->value());
+      if (size == 32) {
+        value = value.singleOverlay();
+      }
+
+      masm.ma_vstr(value, HeapReg, ptrReg, scratch, 0, Assembler::Below);
+    } else {
+      ScratchRegisterScope scratch(masm);
+      Register value = ToRegister(ins->value());
+      masm.ma_dataTransferN(IsStore, size, isSigned, HeapReg, ptrReg, value,
+                            scratch, Offset, cond);
+    }
+  }
+}
+
+void CodeGenerator::visitWasmCompareExchangeHeap(
+    LWasmCompareExchangeHeap* ins) {
+  MWasmCompareExchangeHeap* mir = ins->mir();
+
+  const LAllocation* ptr = ins->ptr();
+  Register ptrReg = ToRegister(ptr);
+  BaseIndex srcAddr(HeapReg, ptrReg, TimesOne, mir->access().offset());
+
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  Register oldval = ToRegister(ins->oldValue());
+  Register newval = ToRegister(ins->newValue());
+  Register out = ToRegister(ins->output());
+
+  masm.wasmCompareExchange(mir->access(), srcAddr, oldval, newval, out);
+}
+
+void CodeGenerator::visitWasmAtomicExchangeHeap(LWasmAtomicExchangeHeap* ins) {
+  MWasmAtomicExchangeHeap* mir = ins->mir();
+
+  Register ptrReg = ToRegister(ins->ptr());
+  Register value = ToRegister(ins->value());
+  Register output = ToRegister(ins->output());
+  BaseIndex srcAddr(HeapReg, ptrReg, TimesOne, mir->access().offset());
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  masm.wasmAtomicExchange(mir->access(), srcAddr, value, output);
+}
+
+void CodeGenerator::visitWasmAtomicBinopHeap(LWasmAtomicBinopHeap* ins) {
+  MWasmAtomicBinopHeap* mir = ins->mir();
+  MOZ_ASSERT(mir->hasUses());
+
+  Register ptrReg = ToRegister(ins->ptr());
+  Register flagTemp = ToRegister(ins->flagTemp());
+  Register output = ToRegister(ins->output());
+  const LAllocation* value = ins->value();
+  AtomicOp op = mir->operation();
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  BaseIndex srcAddr(HeapReg, ptrReg, TimesOne, mir->access().offset());
+  masm.wasmAtomicFetchOp(mir->access(), op, ToRegister(value), srcAddr,
+                         flagTemp, output);
+}
+
+void CodeGenerator::visitWasmAtomicBinopHeapForEffect(
+    LWasmAtomicBinopHeapForEffect* ins) {
+  MWasmAtomicBinopHeap* mir = ins->mir();
+  MOZ_ASSERT(!mir->hasUses());
+
+  Register ptrReg = ToRegister(ins->ptr());
+  Register flagTemp = ToRegister(ins->flagTemp());
+  const LAllocation* value = ins->value();
+  AtomicOp op = mir->operation();
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  BaseIndex srcAddr(HeapReg, ptrReg, TimesOne, mir->access().offset());
+  masm.wasmAtomicEffectOp(mir->access(), op, ToRegister(value), srcAddr,
+                          flagTemp);
+}
+
+void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) {
+  const MWasmStackArg* mir = ins->mir();
+  Address dst(StackPointer, mir->spOffset());
+  ScratchRegisterScope scratch(masm);
+  SecondScratchRegisterScope scratch2(masm);
+
+  if (ins->arg()->isConstant()) {
+    masm.ma_mov(Imm32(ToInt32(ins->arg())), scratch);
+    masm.ma_str(scratch, dst, scratch2);
+  } else {
+    if (ins->arg()->isGeneralReg()) {
+      masm.ma_str(ToRegister(ins->arg()), dst, scratch);
+    } else {
+      masm.ma_vstr(ToFloatRegister(ins->arg()), dst, scratch);
+    }
+  }
+}
+
+void CodeGenerator::visitUDiv(LUDiv* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+
+  Label done;
+  generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), ins->mir());
+
+  masm.ma_udiv(lhs, rhs, output);
+
+  // Check for large unsigned result - represent as double.
+  if (!ins->mir()->isTruncated()) {
+    MOZ_ASSERT(ins->mir()->fallible());
+    masm.as_cmp(output, Imm8(0));
+    bailoutIf(Assembler::LessThan, ins->snapshot());
+  }
+
+  // Check for non-zero remainder if not truncating to int.
+  if (!ins->mir()->canTruncateRemainder()) {
+    MOZ_ASSERT(ins->mir()->fallible());
+    {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_mul(rhs, output, scratch);
+      masm.ma_cmp(scratch, lhs);
+    }
+    bailoutIf(Assembler::NotEqual, ins->snapshot());
+  }
+
+  if (done.used()) {
+    masm.bind(&done);
+  }
+}
+
+void CodeGenerator::visitUMod(LUMod* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+
+  Label done;
+  generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), ins->mir());
+
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_umod(lhs, rhs, output, scratch);
+  }
+
+  // Check for large unsigned result - represent as double.
+  if (!ins->mir()->isTruncated()) {
+    MOZ_ASSERT(ins->mir()->fallible());
+    masm.as_cmp(output, Imm8(0));
+    bailoutIf(Assembler::LessThan, ins->snapshot());
+  }
+
+  if (done.used()) {
+    masm.bind(&done);
+  }
+}
+
+template <class T>
+void CodeGeneratorARM::generateUDivModZeroCheck(Register rhs, Register output,
+                                                Label* done,
+                                                LSnapshot* snapshot, T* mir) {
+  if (!mir) {
+    return;
+  }
+  if (mir->canBeDivideByZero()) {
+    masm.as_cmp(rhs, Imm8(0));
+    if (mir->isTruncated()) {
+      if (mir->trapOnError()) {
+        Label nonZero;
+        masm.ma_b(&nonZero, Assembler::NotEqual);
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+        masm.bind(&nonZero);
+      } else {
+        Label skip;
+        masm.ma_b(&skip, Assembler::NotEqual);
+        // Infinity|0 == 0
+        masm.ma_mov(Imm32(0), output);
+        masm.ma_b(done);
+        masm.bind(&skip);
+      }
+    } else {
+      // Bailout for divide by zero
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, snapshot);
+    }
+  }
+}
+
+void CodeGenerator::visitSoftUDivOrMod(LSoftUDivOrMod* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+
+  MOZ_ASSERT(lhs == r0);
+  MOZ_ASSERT(rhs == r1);
+  MOZ_ASSERT(output == r0);
+
+  Label done;
+  MDiv* div = ins->mir()->isDiv() ? ins->mir()->toDiv() : nullptr;
+  MMod* mod = !div ? ins->mir()->toMod() : nullptr;
+
+  generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), div);
+  generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), mod);
+
+  if (gen->compilingWasm()) {
+    masm.Push(InstanceReg);
+    int32_t framePushedAfterInstance = masm.framePushed();
+    masm.setupWasmABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    wasm::BytecodeOffset bytecodeOffset =
+        (div ? div->bytecodeOffset() : mod->bytecodeOffset());
+    int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+    masm.callWithABI(bytecodeOffset, wasm::SymbolicAddress::aeabi_uidivmod,
+                     mozilla::Some(instanceOffset));
+    masm.Pop(InstanceReg);
+  } else {
+    using Fn = int64_t (*)(int, int);
+    masm.setupAlignedABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    masm.callWithABI<Fn, __aeabi_uidivmod>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+  }
+
+  if (mod) {
+    MOZ_ASSERT(output == r0, "output should not be r1 for mod");
+    masm.move32(r1, output);
+  }
+
+  // uidivmod returns the quotient in r0, and the remainder in r1.
+  if (div && !div->canTruncateRemainder()) {
+    MOZ_ASSERT(div->fallible());
+    masm.as_cmp(r1, Imm8(0));
+    bailoutIf(Assembler::NonZero, ins->snapshot());
+  }
+
+  // Bailout for big unsigned results
+  if ((div && !div->isTruncated()) || (mod && !mod->isTruncated())) {
+    DebugOnly<bool> isFallible =
+        (div && div->fallible()) || (mod && mod->fallible());
+    MOZ_ASSERT(isFallible);
+    masm.as_cmp(output, Imm8(0));
+    bailoutIf(Assembler::LessThan, ins->snapshot());
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitEffectiveAddress(LEffectiveAddress* ins) {
+  const MEffectiveAddress* mir = ins->mir();
+  Register base = ToRegister(ins->base());
+  Register index = ToRegister(ins->index());
+  Register output = ToRegister(ins->output());
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.as_add(output, base, lsl(index, mir->scale()));
+  masm.ma_add(Imm32(mir->displacement()), output, scratch);
+}
+
+void CodeGenerator::visitNegI(LNegI* ins) {
+  Register input = ToRegister(ins->input());
+  masm.ma_neg(input, ToRegister(ins->output()));
+}
+
+void CodeGenerator::visitNegI64(LNegI64* ins) {
+  Register64 input = ToRegister64(ins->getInt64Operand(0));
+  MOZ_ASSERT(input == ToOutRegister64(ins));
+  masm.neg64(input);
+}
+
+void CodeGenerator::visitNegD(LNegD* ins) {
+  FloatRegister input = ToFloatRegister(ins->input());
+  masm.ma_vneg(input, ToFloatRegister(ins->output()));
+}
+
+void CodeGenerator::visitNegF(LNegF* ins) {
+  FloatRegister input = ToFloatRegister(ins->input());
+  masm.ma_vneg_f32(input, ToFloatRegister(ins->output()));
+}
+
+void CodeGenerator::visitMemoryBarrier(LMemoryBarrier* ins) {
+  masm.memoryBarrier(ins->type());
+}
+
+void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) {
+  auto input = ToFloatRegister(lir->input());
+  auto output = ToRegister(lir->output());
+
+  MWasmTruncateToInt32* mir = lir->mir();
+  MIRType fromType = mir->input()->type();
+
+  OutOfLineWasmTruncateCheck* ool = nullptr;
+  Label* oolEntry = nullptr;
+  if (!lir->mir()->isSaturating()) {
+    ool = new (alloc())
+        OutOfLineWasmTruncateCheck(mir, input, Register::Invalid());
+    addOutOfLineCode(ool, mir);
+    oolEntry = ool->entry();
+  }
+
+  masm.wasmTruncateToInt32(input, output, fromType, mir->isUnsigned(),
+                           mir->isSaturating(), oolEntry);
+
+  if (!lir->mir()->isSaturating()) {
+    masm.bind(ool->rejoin());
+  }
+}
+
+void CodeGenerator::visitWasmTruncateToInt64(LWasmTruncateToInt64* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MOZ_ASSERT(ToRegister(lir->instance()) == InstanceReg);
+  masm.Push(InstanceReg);
+  int32_t framePushedAfterInstance = masm.framePushed();
+
+  FloatRegister input = ToFloatRegister(lir->input());
+  FloatRegister inputDouble = input;
+  Register64 output = ToOutRegister64(lir);
+
+  MWasmBuiltinTruncateToInt64* mir = lir->mir();
+  MIRType fromType = mir->input()->type();
+
+  OutOfLineWasmTruncateCheck* ool = nullptr;
+  if (!lir->mir()->isSaturating()) {
+    ool = new (alloc())
+        OutOfLineWasmTruncateCheck(mir, input, Register64::Invalid());
+    addOutOfLineCode(ool, mir);
+  }
+
+  ScratchDoubleScope fpscratch(masm);
+  if (fromType == MIRType::Float32) {
+    inputDouble = fpscratch;
+    masm.convertFloat32ToDouble(input, inputDouble);
+  }
+
+  masm.Push(input);
+
+  masm.setupWasmABICall();
+  masm.passABIArg(inputDouble, MoveOp::DOUBLE);
+
+  int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+  if (lir->mir()->isSaturating()) {
+    if (lir->mir()->isUnsigned()) {
+      masm.callWithABI(mir->bytecodeOffset(),
+                       wasm::SymbolicAddress::SaturatingTruncateDoubleToUint64,
+                       mozilla::Some(instanceOffset));
+    } else {
+      masm.callWithABI(mir->bytecodeOffset(),
+                       wasm::SymbolicAddress::SaturatingTruncateDoubleToInt64,
+                       mozilla::Some(instanceOffset));
+    }
+  } else {
+    if (lir->mir()->isUnsigned()) {
+      masm.callWithABI(mir->bytecodeOffset(),
+                       wasm::SymbolicAddress::TruncateDoubleToUint64,
+                       mozilla::Some(instanceOffset));
+    } else {
+      masm.callWithABI(mir->bytecodeOffset(),
+                       wasm::SymbolicAddress::TruncateDoubleToInt64,
+                       mozilla::Some(instanceOffset));
+    }
+  }
+
+  masm.Pop(input);
+  masm.Pop(InstanceReg);
+
+  // TruncateDoubleTo{UI,I}nt64 returns 0x8000000000000000 to indicate
+  // exceptional results, so check for that and produce the appropriate
+  // traps. The Saturating form always returns a normal value and never
+  // needs traps.
+  if (!lir->mir()->isSaturating()) {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_cmp(output.high, Imm32(0x80000000), scratch);
+    masm.as_cmp(output.low, Imm8(0x00000000), Assembler::Equal);
+    masm.ma_b(ool->entry(), Assembler::Equal);
+
+    masm.bind(ool->rejoin());
+  }
+
+  MOZ_ASSERT(ReturnReg64 == output);
+}
+
+void CodeGeneratorARM::visitOutOfLineWasmTruncateCheck(
+    OutOfLineWasmTruncateCheck* ool) {
+  // On ARM, saturating truncation codegen handles saturating itself rather than
+  // relying on out-of-line fixup code.
+  if (ool->isSaturating()) {
+    return;
+  }
+
+  masm.outOfLineWasmTruncateToIntCheck(ool->input(), ool->fromType(),
+                                       ool->toType(), ool->isUnsigned(),
+                                       ool->rejoin(), ool->bytecodeOffset());
+}
+
+void CodeGenerator::visitInt64ToFloatingPointCall(
+    LInt64ToFloatingPointCall* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MOZ_ASSERT(ToRegister(lir->getOperand(LInt64ToFloatingPointCall::Instance)) ==
+             InstanceReg);
+  masm.Push(InstanceReg);
+  int32_t framePushedAfterInstance = masm.framePushed();
+
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+
+  MBuiltinInt64ToFloatingPoint* mir = lir->mir();
+  MIRType toType = mir->type();
+
+  masm.setupWasmABICall();
+  masm.passABIArg(input.high);
+  masm.passABIArg(input.low);
+
+  bool isUnsigned = mir->isUnsigned();
+  wasm::SymbolicAddress callee =
+      toType == MIRType::Float32
+          ? (isUnsigned ? wasm::SymbolicAddress::Uint64ToFloat32
+                        : wasm::SymbolicAddress::Int64ToFloat32)
+          : (isUnsigned ? wasm::SymbolicAddress::Uint64ToDouble
+                        : wasm::SymbolicAddress::Int64ToDouble);
+
+  int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+  MoveOp::Type result =
+      toType == MIRType::Float32 ? MoveOp::FLOAT32 : MoveOp::DOUBLE;
+  masm.callWithABI(mir->bytecodeOffset(), callee, mozilla::Some(instanceOffset),
+                   result);
+
+  DebugOnly<FloatRegister> output(ToFloatRegister(lir->output()));
+  MOZ_ASSERT_IF(toType == MIRType::Double, output.value == ReturnDoubleReg);
+  MOZ_ASSERT_IF(toType == MIRType::Float32, output.value == ReturnFloat32Reg);
+
+  masm.Pop(InstanceReg);
+}
+
+void CodeGenerator::visitCopySignF(LCopySignF* ins) {
+  FloatRegister lhs = ToFloatRegister(ins->getOperand(0));
+  FloatRegister rhs = ToFloatRegister(ins->getOperand(1));
+  FloatRegister output = ToFloatRegister(ins->getDef(0));
+
+  Register lhsi = ToRegister(ins->getTemp(0));
+  Register rhsi = ToRegister(ins->getTemp(1));
+
+  masm.ma_vxfer(lhs, lhsi);
+  masm.ma_vxfer(rhs, rhsi);
+
+  ScratchRegisterScope scratch(masm);
+
+  // Clear lhs's sign.
+  masm.ma_and(Imm32(INT32_MAX), lhsi, lhsi, scratch);
+
+  // Keep rhs's sign.
+  masm.ma_and(Imm32(INT32_MIN), rhsi, rhsi, scratch);
+
+  // Combine.
+  masm.ma_orr(lhsi, rhsi, rhsi);
+
+  masm.ma_vxfer(rhsi, output);
+}
+
+void CodeGenerator::visitCopySignD(LCopySignD* ins) {
+  FloatRegister lhs = ToFloatRegister(ins->getOperand(0));
+  FloatRegister rhs = ToFloatRegister(ins->getOperand(1));
+  FloatRegister output = ToFloatRegister(ins->getDef(0));
+
+  Register lhsi = ToRegister(ins->getTemp(0));
+  Register rhsi = ToRegister(ins->getTemp(1));
+
+  // Manipulate high words of double inputs.
+  masm.as_vxfer(lhsi, InvalidReg, lhs, Assembler::FloatToCore,
+                Assembler::Always, 1);
+  masm.as_vxfer(rhsi, InvalidReg, rhs, Assembler::FloatToCore,
+                Assembler::Always, 1);
+
+  ScratchRegisterScope scratch(masm);
+
+  // Clear lhs's sign.
+  masm.ma_and(Imm32(INT32_MAX), lhsi, lhsi, scratch);
+
+  // Keep rhs's sign.
+  masm.ma_and(Imm32(INT32_MIN), rhsi, rhsi, scratch);
+
+  // Combine.
+  masm.ma_orr(lhsi, rhsi, rhsi);
+
+  // Reconstruct the output.
+  masm.as_vxfer(lhsi, InvalidReg, lhs, Assembler::FloatToCore,
+                Assembler::Always, 0);
+  masm.ma_vxfer(lhsi, rhsi, output);
+}
+
+void CodeGenerator::visitWrapInt64ToInt32(LWrapInt64ToInt32* lir) {
+  const LInt64Allocation& input = lir->getInt64Operand(0);
+  Register output = ToRegister(lir->output());
+
+  if (lir->mir()->bottomHalf()) {
+    masm.move32(ToRegister(input.low()), output);
+  } else {
+    masm.move32(ToRegister(input.high()), output);
+  }
+}
+
+void CodeGenerator::visitExtendInt32ToInt64(LExtendInt32ToInt64* lir) {
+  Register64 output = ToOutRegister64(lir);
+  MOZ_ASSERT(ToRegister(lir->input()) == output.low);
+
+  if (lir->mir()->isUnsigned()) {
+    masm.ma_mov(Imm32(0), output.high);
+  } else {
+    masm.ma_asr(Imm32(31), output.low, output.high);
+  }
+}
+
+void CodeGenerator::visitSignExtendInt64(LSignExtendInt64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+  switch (lir->mode()) {
+    case MSignExtendInt64::Byte:
+      masm.move8SignExtend(input.low, output.low);
+      break;
+    case MSignExtendInt64::Half:
+      masm.move16SignExtend(input.low, output.low);
+      break;
+    case MSignExtendInt64::Word:
+      masm.move32(input.low, output.low);
+      break;
+  }
+  masm.ma_asr(Imm32(31), output.low, output.high);
+}
+
+void CodeGenerator::visitWasmExtendU32Index(LWasmExtendU32Index*) {
+  MOZ_CRASH("64-bit only");
+}
+
+void CodeGenerator::visitWasmWrapU32Index(LWasmWrapU32Index* lir) {
+  // Generates no code on this platform because we just return the low part of
+  // the input register pair.
+  MOZ_ASSERT(ToRegister(lir->input()) == ToRegister(lir->output()));
+}
+
+void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MOZ_ASSERT(ToRegister(lir->getOperand(LDivOrModI64::Instance)) ==
+             InstanceReg);
+  masm.Push(InstanceReg);
+  int32_t framePushedAfterInstance = masm.framePushed();
+
+  Register64 lhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Lhs));
+  Register64 rhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Rhs));
+  Register64 output = ToOutRegister64(lir);
+
+  MOZ_ASSERT(output == ReturnReg64);
+
+  Label done;
+
+  // Handle divide by zero.
+  if (lir->canBeDivideByZero()) {
+    Label nonZero;
+    // We can use InstanceReg as temp register because we preserved it
+    // before.
+    masm.branchTest64(Assembler::NonZero, rhs, rhs, InstanceReg, &nonZero);
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset());
+    masm.bind(&nonZero);
+  }
+
+  auto* mir = lir->mir();
+
+  // Handle an integer overflow exception from INT64_MIN / -1.
+  if (lir->canBeNegativeOverflow()) {
+    Label notmin;
+    masm.branch64(Assembler::NotEqual, lhs, Imm64(INT64_MIN), &notmin);
+    masm.branch64(Assembler::NotEqual, rhs, Imm64(-1), &notmin);
+    if (mir->isWasmBuiltinModI64()) {
+      masm.xor64(output, output);
+    } else {
+      masm.wasmTrap(wasm::Trap::IntegerOverflow, lir->bytecodeOffset());
+    }
+    masm.jump(&done);
+    masm.bind(&notmin);
+  }
+
+  masm.setupWasmABICall();
+  masm.passABIArg(lhs.high);
+  masm.passABIArg(lhs.low);
+  masm.passABIArg(rhs.high);
+  masm.passABIArg(rhs.low);
+
+  int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+  if (mir->isWasmBuiltinModI64()) {
+    masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::ModI64,
+                     mozilla::Some(instanceOffset));
+  } else {
+    masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::DivI64,
+                     mozilla::Some(instanceOffset));
+  }
+
+  MOZ_ASSERT(ReturnReg64 == output);
+
+  masm.bind(&done);
+  masm.Pop(InstanceReg);
+}
+
+void CodeGenerator::visitUDivOrModI64(LUDivOrModI64* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MOZ_ASSERT(ToRegister(lir->getOperand(LDivOrModI64::Instance)) ==
+             InstanceReg);
+  masm.Push(InstanceReg);
+  int32_t framePushedAfterInstance = masm.framePushed();
+
+  Register64 lhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Lhs));
+  Register64 rhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Rhs));
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ReturnReg64);
+
+  // Prevent divide by zero.
+  if (lir->canBeDivideByZero()) {
+    Label nonZero;
+    // We can use InstanceReg as temp register because we preserved it
+    // before.
+    masm.branchTest64(Assembler::NonZero, rhs, rhs, InstanceReg, &nonZero);
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset());
+    masm.bind(&nonZero);
+  }
+
+  masm.setupWasmABICall();
+  masm.passABIArg(lhs.high);
+  masm.passABIArg(lhs.low);
+  masm.passABIArg(rhs.high);
+  masm.passABIArg(rhs.low);
+
+  MDefinition* mir = lir->mir();
+  int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+  if (mir->isWasmBuiltinModI64()) {
+    masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::UModI64,
+                     mozilla::Some(instanceOffset));
+  } else {
+    masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::UDivI64,
+                     mozilla::Some(instanceOffset));
+  }
+  masm.Pop(InstanceReg);
+}
+
+void CodeGenerator::visitCompareI64(LCompareI64* lir) {
+  MCompare* mir = lir->mir();
+  MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 ||
+             mir->compareType() == MCompare::Compare_UInt64);
+
+  const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs);
+  Register64 lhsRegs = ToRegister64(lhs);
+  Register output = ToRegister(lir->output());
+
+  bool isSigned = mir->compareType() == MCompare::Compare_Int64;
+  Assembler::Condition condition = JSOpToCondition(lir->jsop(), isSigned);
+  Label done;
+
+  masm.move32(Imm32(1), output);
+
+  if (IsConstant(rhs)) {
+    Imm64 imm = Imm64(ToInt64(rhs));
+    masm.branch64(condition, lhsRegs, imm, &done);
+  } else {
+    Register64 rhsRegs = ToRegister64(rhs);
+    masm.branch64(condition, lhsRegs, rhsRegs, &done);
+  }
+
+  masm.move32(Imm32(0), output);
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitCompareI64AndBranch(LCompareI64AndBranch* lir) {
+  MCompare* mir = lir->cmpMir();
+  MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 ||
+             mir->compareType() == MCompare::Compare_UInt64);
+
+  const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs);
+  Register64 lhsRegs = ToRegister64(lhs);
+
+  bool isSigned = mir->compareType() == MCompare::Compare_Int64;
+  Assembler::Condition condition = JSOpToCondition(lir->jsop(), isSigned);
+
+  Label* trueLabel = getJumpLabelForBranch(lir->ifTrue());
+  Label* falseLabel = getJumpLabelForBranch(lir->ifFalse());
+
+  if (isNextBlock(lir->ifFalse()->lir())) {
+    falseLabel = nullptr;
+  } else if (isNextBlock(lir->ifTrue()->lir())) {
+    condition = Assembler::InvertCondition(condition);
+    trueLabel = falseLabel;
+    falseLabel = nullptr;
+  }
+
+  if (IsConstant(rhs)) {
+    Imm64 imm = Imm64(ToInt64(rhs));
+    masm.branch64(condition, lhsRegs, imm, trueLabel, falseLabel);
+  } else {
+    Register64 rhsRegs = ToRegister64(rhs);
+    masm.branch64(condition, lhsRegs, rhsRegs, trueLabel, falseLabel);
+  }
+}
+
+void CodeGenerator::visitShiftI64(LShiftI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs);
+  LAllocation* rhs = lir->getOperand(LShiftI64::Rhs);
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  if (rhs->isConstant()) {
+    int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
+    switch (lir->bitop()) {
+      case JSOp::Lsh:
+        if (shift) {
+          masm.lshift64(Imm32(shift), ToRegister64(lhs));
+        }
+        break;
+      case JSOp::Rsh:
+        if (shift) {
+          masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs));
+        }
+        break;
+      case JSOp::Ursh:
+        if (shift) {
+          masm.rshift64(Imm32(shift), ToRegister64(lhs));
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+    return;
+  }
+
+  switch (lir->bitop()) {
+    case JSOp::Lsh:
+      masm.lshift64(ToRegister(rhs), ToRegister64(lhs));
+      break;
+    case JSOp::Rsh:
+      masm.rshift64Arithmetic(ToRegister(rhs), ToRegister64(lhs));
+      break;
+    case JSOp::Ursh:
+      masm.rshift64(ToRegister(rhs), ToRegister64(lhs));
+      break;
+    default:
+      MOZ_CRASH("Unexpected shift op");
+  }
+}
+
+void CodeGenerator::visitBitOpI64(LBitOpI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LBitOpI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LBitOpI64::Rhs);
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  switch (lir->bitop()) {
+    case JSOp::BitOr:
+      if (IsConstant(rhs)) {
+        masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+      } else {
+        masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+      }
+      break;
+    case JSOp::BitXor:
+      if (IsConstant(rhs)) {
+        masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+      } else {
+        masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+      }
+      break;
+    case JSOp::BitAnd:
+      if (IsConstant(rhs)) {
+        masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+      } else {
+        masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+      }
+      break;
+    default:
+      MOZ_CRASH("unexpected binary opcode");
+  }
+}
+
+void CodeGenerator::visitRotateI64(LRotateI64* lir) {
+  MRotate* mir = lir->mir();
+  LAllocation* count = lir->count();
+
+  Register64 input = ToRegister64(lir->input());
+  Register64 output = ToOutRegister64(lir);
+  Register temp = ToTempRegisterOrInvalid(lir->temp());
+
+  if (count->isConstant()) {
+    int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F);
+    if (!c) {
+      masm.move64(input, output);
+      return;
+    }
+    if (mir->isLeftRotate()) {
+      masm.rotateLeft64(Imm32(c), input, output, temp);
+    } else {
+      masm.rotateRight64(Imm32(c), input, output, temp);
+    }
+  } else {
+    if (mir->isLeftRotate()) {
+      masm.rotateLeft64(ToRegister(count), input, output, temp);
+    } else {
+      masm.rotateRight64(ToRegister(count), input, output, temp);
+    }
+  }
+}
+
+void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) {
+  const MWasmStackArg* mir = ins->mir();
+  Address dst(StackPointer, mir->spOffset());
+  if (IsConstant(ins->arg())) {
+    masm.store64(Imm64(ToInt64(ins->arg())), dst);
+  } else {
+    masm.store64(ToRegister64(ins->arg()), dst);
+  }
+}
+
+void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) {
+  Register cond = ToRegister(lir->condExpr());
+  const LInt64Allocation falseExpr = lir->falseExpr();
+
+  Register64 out = ToOutRegister64(lir);
+  MOZ_ASSERT(ToRegister64(lir->trueExpr()) == out,
+             "true expr is reused for input");
+
+  masm.as_cmp(cond, Imm8(0));
+  if (falseExpr.low().isRegister()) {
+    masm.ma_mov(ToRegister(falseExpr.low()), out.low, LeaveCC,
+                Assembler::Equal);
+    masm.ma_mov(ToRegister(falseExpr.high()), out.high, LeaveCC,
+                Assembler::Equal);
+  } else {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_ldr(ToAddress(falseExpr.low()), out.low, scratch, Offset,
+                Assembler::Equal);
+    masm.ma_ldr(ToAddress(falseExpr.high()), out.high, scratch, Offset,
+                Assembler::Equal);
+  }
+}
+
+void CodeGenerator::visitWasmReinterpretFromI64(LWasmReinterpretFromI64* lir) {
+  MOZ_ASSERT(lir->mir()->type() == MIRType::Double);
+  MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Int64);
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  FloatRegister output = ToFloatRegister(lir->output());
+
+  masm.ma_vxfer(input.low, input.high, output);
+}
+
+void CodeGenerator::visitWasmReinterpretToI64(LWasmReinterpretToI64* lir) {
+  MOZ_ASSERT(lir->mir()->type() == MIRType::Int64);
+  MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Double);
+  FloatRegister input = ToFloatRegister(lir->getOperand(0));
+  Register64 output = ToOutRegister64(lir);
+
+  masm.ma_vxfer(input, output.low, output.high);
+}
+
+void CodeGenerator::visitPopcntI64(LPopcntI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+  Register temp = ToRegister(lir->getTemp(0));
+
+  masm.popcnt64(input, output, temp);
+}
+
+void CodeGenerator::visitClzI64(LClzI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+
+  masm.clz64(input, output.low);
+  masm.move32(Imm32(0), output.high);
+}
+
+void CodeGenerator::visitCtzI64(LCtzI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+
+  masm.ctz64(input, output.low);
+  masm.move32(Imm32(0), output.high);
+}
+
+void CodeGenerator::visitBitNotI64(LBitNotI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  MOZ_ASSERT(input == ToOutRegister64(lir));
+  masm.ma_mvn(input.high, input.high);
+  masm.ma_mvn(input.low, input.low);
+}
+
+void CodeGenerator::visitTestI64AndBranch(LTestI64AndBranch* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+
+  masm.as_cmp(input.high, Imm8(0));
+  jumpToBlock(lir->ifTrue(), Assembler::NonZero);
+  masm.as_cmp(input.low, Imm8(0));
+  emitBranch(Assembler::NonZero, lir->ifTrue(), lir->ifFalse());
+}
+
+void CodeGenerator::visitWasmAtomicLoadI64(LWasmAtomicLoadI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 output = ToOutRegister64(lir);
+  Register64 tmp(InvalidReg, InvalidReg);
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->mir()->access().offset());
+  masm.wasmAtomicLoad64(lir->mir()->access(), addr, tmp, output);
+}
+
+void CodeGenerator::visitWasmAtomicStoreI64(LWasmAtomicStoreI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 value = ToRegister64(lir->value());
+  Register64 tmp(ToRegister(lir->tmpHigh()), ToRegister(lir->tmpLow()));
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->mir()->access().offset());
+  masm.wasmAtomicExchange64(lir->mir()->access(), addr, value, tmp);
+}
+
+void CodeGenerator::visitWasmCompareExchangeI64(LWasmCompareExchangeI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 expected = ToRegister64(lir->expected());
+  Register64 replacement = ToRegister64(lir->replacement());
+  Register64 out = ToOutRegister64(lir);
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->mir()->access().offset());
+  masm.wasmCompareExchange64(lir->mir()->access(), addr, expected, replacement,
+                             out);
+}
+
+void CodeGenerator::visitWasmAtomicBinopI64(LWasmAtomicBinopI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 value = ToRegister64(lir->value());
+  Register64 out = ToOutRegister64(lir);
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->access().offset());
+  Register64 tmp(ToRegister(lir->tmpHigh()), ToRegister(lir->tmpLow()));
+  masm.wasmAtomicFetchOp64(lir->access(), lir->operation(), value, addr, tmp,
+                           out);
+}
+
+void CodeGenerator::visitWasmAtomicExchangeI64(LWasmAtomicExchangeI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 value = ToRegister64(lir->value());
+  Register64 out = ToOutRegister64(lir);
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->access().offset());
+  masm.wasmAtomicExchange64(lir->access(), addr, value, out);
+}
+
+void CodeGenerator::visitNearbyInt(LNearbyInt*) { MOZ_CRASH("NYI"); }
+
+void CodeGenerator::visitNearbyIntF(LNearbyIntF*) { MOZ_CRASH("NYI"); }
+
+void CodeGenerator::visitSimd128(LSimd128* ins) { MOZ_CRASH("No SIMD"); }
+
+void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmBinarySimd128WithConstant(
+    LWasmBinarySimd128WithConstant* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmVariableShiftSimd128(
+    LWasmVariableShiftSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmConstantShiftSimd128(
+    LWasmConstantShiftSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmSignReplicationSimd128(
+    LWasmSignReplicationSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReplaceInt64LaneSimd128(
+    LWasmReplaceInt64LaneSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReduceAndBranchSimd128(
+    LWasmReduceAndBranchSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReduceSimd128ToInt64(
+    LWasmReduceSimd128ToInt64* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
diff --git a/js/src/jit/arm/CodeGenerator-arm.h b/js/src/jit/arm/CodeGenerator-arm.h
new file mode 100644
index 0000000000..f7cf2b263e
--- /dev/null
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -0,0 +1,172 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_CodeGenerator_arm_h
+#define jit_arm_CodeGenerator_arm_h
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/shared/CodeGenerator-shared.h"
+#include "js/ScalarType.h"  // js::Scalar::Type
+
+namespace js {
+namespace jit {
+
+class CodeGeneratorARM;
+class OutOfLineBailout;
+class OutOfLineTableSwitch;
+
+using OutOfLineWasmTruncateCheck =
+    OutOfLineWasmTruncateCheckBase<CodeGeneratorARM>;
+
+class CodeGeneratorARM : public CodeGeneratorShared {
+  friend class MoveResolverARM;
+
+ protected:
+  CodeGeneratorARM(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
+
+  NonAssertingLabel deoptLabel_;
+
+  MoveOperand toMoveOperand(LAllocation a) const;
+
+  void bailoutIf(Assembler::Condition condition, LSnapshot* snapshot);
+  void bailoutFrom(Label* label, LSnapshot* snapshot);
+  void bailout(LSnapshot* snapshot);
+
+  template <typename T1, typename T2>
+  void bailoutCmpPtr(Assembler::Condition c, T1 lhs, T2 rhs,
+                     LSnapshot* snapshot) {
+    masm.cmpPtr(lhs, rhs);
+    bailoutIf(c, snapshot);
+  }
+  void bailoutTestPtr(Assembler::Condition c, Register lhs, Register rhs,
+                      LSnapshot* snapshot) {
+    masm.testPtr(lhs, rhs);
+    bailoutIf(c, snapshot);
+  }
+  template <typename T1, typename T2>
+  void bailoutCmp32(Assembler::Condition c, T1 lhs, T2 rhs,
+                    LSnapshot* snapshot) {
+    masm.cmp32(lhs, rhs);
+    bailoutIf(c, snapshot);
+  }
+  template <typename T1, typename T2>
+  void bailoutTest32(Assembler::Condition c, T1 lhs, T2 rhs,
+                     LSnapshot* snapshot) {
+    masm.test32(lhs, rhs);
+    bailoutIf(c, snapshot);
+  }
+  void bailoutIfFalseBool(Register reg, LSnapshot* snapshot) {
+    masm.test32(reg, Imm32(0xFF));
+    bailoutIf(Assembler::Zero, snapshot);
+  }
+
+  template <class T>
+  void generateUDivModZeroCheck(Register rhs, Register output, Label* done,
+                                LSnapshot* snapshot, T* mir);
+
+  bool generateOutOfLineCode();
+
+  // Emits a branch that directs control flow to the true block if |cond| is
+  // true, and the false block if |cond| is false.
+  void emitBranch(Assembler::Condition cond, MBasicBlock* ifTrue,
+                  MBasicBlock* ifFalse);
+
+  void testNullEmitBranch(Assembler::Condition cond, const ValueOperand& value,
+                          MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+    cond = masm.testNull(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testUndefinedEmitBranch(Assembler::Condition cond,
+                               const ValueOperand& value, MBasicBlock* ifTrue,
+                               MBasicBlock* ifFalse) {
+    cond = masm.testUndefined(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testObjectEmitBranch(Assembler::Condition cond,
+                            const ValueOperand& value, MBasicBlock* ifTrue,
+                            MBasicBlock* ifFalse) {
+    cond = masm.testObject(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testZeroEmitBranch(Assembler::Condition cond, Register reg,
+                          MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+    MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+    masm.cmpPtr(reg, ImmWord(0));
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+
+  void emitTableSwitchDispatch(MTableSwitch* mir, Register index,
+                               Register base);
+
+  void emitBigIntDiv(LBigIntDiv* ins, Register dividend, Register divisor,
+                     Register output, Label* fail);
+  void emitBigIntMod(LBigIntMod* ins, Register dividend, Register divisor,
+                     Register output, Label* fail);
+
+  template <typename T>
+  void emitWasmLoad(T* ins);
+  template <typename T>
+  void emitWasmUnalignedLoad(T* ins);
+  template <typename T>
+  void emitWasmStore(T* ins);
+  template <typename T>
+  void emitWasmUnalignedStore(T* ins);
+
+  ValueOperand ToValue(LInstruction* ins, size_t pos);
+  ValueOperand ToTempValue(LInstruction* ins, size_t pos);
+
+  Register64 ToOperandOrRegister64(const LInt64Allocation input);
+
+  // Functions for LTestVAndBranch.
+  void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag);
+
+  void divICommon(MDiv* mir, Register lhs, Register rhs, Register output,
+                  LSnapshot* snapshot, Label& done);
+  void modICommon(MMod* mir, Register lhs, Register rhs, Register output,
+                  LSnapshot* snapshot, Label& done);
+
+  void generateInvalidateEpilogue();
+
+  // Generating a result.
+  template <typename S, typename T>
+  void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                  const S& value, const T& mem,
+                                  Register flagTemp, Register outTemp,
+                                  AnyRegister output);
+
+  // Generating no result.
+  template <typename S, typename T>
+  void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                  const S& value, const T& mem,
+                                  Register flagTemp);
+
+ public:
+  void visitOutOfLineBailout(OutOfLineBailout* ool);
+  void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool);
+  void visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool);
+};
+
+typedef CodeGeneratorARM CodeGeneratorSpecific;
+
+// An out-of-line bailout thunk.
+class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM> {
+ protected:  // Silence Clang warning.
+  LSnapshot* snapshot_;
+  uint32_t frameSize_;
+
+ public:
+  OutOfLineBailout(LSnapshot* snapshot, uint32_t frameSize)
+      : snapshot_(snapshot), frameSize_(frameSize) {}
+
+  void accept(CodeGeneratorARM* codegen) override;
+
+  LSnapshot* snapshot() const { return snapshot_; }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_CodeGenerator_arm_h */
diff --git a/js/src/jit/arm/DoubleEntryTable.tbl b/js/src/jit/arm/DoubleEntryTable.tbl
new file mode 100644
index 0000000000..2e9e8c4a34
--- /dev/null
+++ b/js/src/jit/arm/DoubleEntryTable.tbl
@@ -0,0 +1,257 @@
+/* THIS FILE IS AUTOMATICALLY GENERATED BY gen-double-encode-table.py.  */
+  { 0x40000000, { 0, 0, 0 } },
+  { 0x40010000, { 1, 0, 0 } },
+  { 0x40020000, { 2, 0, 0 } },
+  { 0x40030000, { 3, 0, 0 } },
+  { 0x40040000, { 4, 0, 0 } },
+  { 0x40050000, { 5, 0, 0 } },
+  { 0x40060000, { 6, 0, 0 } },
+  { 0x40070000, { 7, 0, 0 } },
+  { 0x40080000, { 8, 0, 0 } },
+  { 0x40090000, { 9, 0, 0 } },
+  { 0x400a0000, { 10, 0, 0 } },
+  { 0x400b0000, { 11, 0, 0 } },
+  { 0x400c0000, { 12, 0, 0 } },
+  { 0x400d0000, { 13, 0, 0 } },
+  { 0x400e0000, { 14, 0, 0 } },
+  { 0x400f0000, { 15, 0, 0 } },
+  { 0x40100000, { 0, 1, 0 } },
+  { 0x40110000, { 1, 1, 0 } },
+  { 0x40120000, { 2, 1, 0 } },
+  { 0x40130000, { 3, 1, 0 } },
+  { 0x40140000, { 4, 1, 0 } },
+  { 0x40150000, { 5, 1, 0 } },
+  { 0x40160000, { 6, 1, 0 } },
+  { 0x40170000, { 7, 1, 0 } },
+  { 0x40180000, { 8, 1, 0 } },
+  { 0x40190000, { 9, 1, 0 } },
+  { 0x401a0000, { 10, 1, 0 } },
+  { 0x401b0000, { 11, 1, 0 } },
+  { 0x401c0000, { 12, 1, 0 } },
+  { 0x401d0000, { 13, 1, 0 } },
+  { 0x401e0000, { 14, 1, 0 } },
+  { 0x401f0000, { 15, 1, 0 } },
+  { 0x40200000, { 0, 2, 0 } },
+  { 0x40210000, { 1, 2, 0 } },
+  { 0x40220000, { 2, 2, 0 } },
+  { 0x40230000, { 3, 2, 0 } },
+  { 0x40240000, { 4, 2, 0 } },
+  { 0x40250000, { 5, 2, 0 } },
+  { 0x40260000, { 6, 2, 0 } },
+  { 0x40270000, { 7, 2, 0 } },
+  { 0x40280000, { 8, 2, 0 } },
+  { 0x40290000, { 9, 2, 0 } },
+  { 0x402a0000, { 10, 2, 0 } },
+  { 0x402b0000, { 11, 2, 0 } },
+  { 0x402c0000, { 12, 2, 0 } },
+  { 0x402d0000, { 13, 2, 0 } },
+  { 0x402e0000, { 14, 2, 0 } },
+  { 0x402f0000, { 15, 2, 0 } },
+  { 0x40300000, { 0, 3, 0 } },
+  { 0x40310000, { 1, 3, 0 } },
+  { 0x40320000, { 2, 3, 0 } },
+  { 0x40330000, { 3, 3, 0 } },
+  { 0x40340000, { 4, 3, 0 } },
+  { 0x40350000, { 5, 3, 0 } },
+  { 0x40360000, { 6, 3, 0 } },
+  { 0x40370000, { 7, 3, 0 } },
+  { 0x40380000, { 8, 3, 0 } },
+  { 0x40390000, { 9, 3, 0 } },
+  { 0x403a0000, { 10, 3, 0 } },
+  { 0x403b0000, { 11, 3, 0 } },
+  { 0x403c0000, { 12, 3, 0 } },
+  { 0x403d0000, { 13, 3, 0 } },
+  { 0x403e0000, { 14, 3, 0 } },
+  { 0x403f0000, { 15, 3, 0 } },
+  { 0x3fc00000, { 0, 4, 0 } },
+  { 0x3fc10000, { 1, 4, 0 } },
+  { 0x3fc20000, { 2, 4, 0 } },
+  { 0x3fc30000, { 3, 4, 0 } },
+  { 0x3fc40000, { 4, 4, 0 } },
+  { 0x3fc50000, { 5, 4, 0 } },
+  { 0x3fc60000, { 6, 4, 0 } },
+  { 0x3fc70000, { 7, 4, 0 } },
+  { 0x3fc80000, { 8, 4, 0 } },
+  { 0x3fc90000, { 9, 4, 0 } },
+  { 0x3fca0000, { 10, 4, 0 } },
+  { 0x3fcb0000, { 11, 4, 0 } },
+  { 0x3fcc0000, { 12, 4, 0 } },
+  { 0x3fcd0000, { 13, 4, 0 } },
+  { 0x3fce0000, { 14, 4, 0 } },
+  { 0x3fcf0000, { 15, 4, 0 } },
+  { 0x3fd00000, { 0, 5, 0 } },
+  { 0x3fd10000, { 1, 5, 0 } },
+  { 0x3fd20000, { 2, 5, 0 } },
+  { 0x3fd30000, { 3, 5, 0 } },
+  { 0x3fd40000, { 4, 5, 0 } },
+  { 0x3fd50000, { 5, 5, 0 } },
+  { 0x3fd60000, { 6, 5, 0 } },
+  { 0x3fd70000, { 7, 5, 0 } },
+  { 0x3fd80000, { 8, 5, 0 } },
+  { 0x3fd90000, { 9, 5, 0 } },
+  { 0x3fda0000, { 10, 5, 0 } },
+  { 0x3fdb0000, { 11, 5, 0 } },
+  { 0x3fdc0000, { 12, 5, 0 } },
+  { 0x3fdd0000, { 13, 5, 0 } },
+  { 0x3fde0000, { 14, 5, 0 } },
+  { 0x3fdf0000, { 15, 5, 0 } },
+  { 0x3fe00000, { 0, 6, 0 } },
+  { 0x3fe10000, { 1, 6, 0 } },
+  { 0x3fe20000, { 2, 6, 0 } },
+  { 0x3fe30000, { 3, 6, 0 } },
+  { 0x3fe40000, { 4, 6, 0 } },
+  { 0x3fe50000, { 5, 6, 0 } },
+  { 0x3fe60000, { 6, 6, 0 } },
+  { 0x3fe70000, { 7, 6, 0 } },
+  { 0x3fe80000, { 8, 6, 0 } },
+  { 0x3fe90000, { 9, 6, 0 } },
+  { 0x3fea0000, { 10, 6, 0 } },
+  { 0x3feb0000, { 11, 6, 0 } },
+  { 0x3fec0000, { 12, 6, 0 } },
+  { 0x3fed0000, { 13, 6, 0 } },
+  { 0x3fee0000, { 14, 6, 0 } },
+  { 0x3fef0000, { 15, 6, 0 } },
+  { 0x3ff00000, { 0, 7, 0 } },
+  { 0x3ff10000, { 1, 7, 0 } },
+  { 0x3ff20000, { 2, 7, 0 } },
+  { 0x3ff30000, { 3, 7, 0 } },
+  { 0x3ff40000, { 4, 7, 0 } },
+  { 0x3ff50000, { 5, 7, 0 } },
+  { 0x3ff60000, { 6, 7, 0 } },
+  { 0x3ff70000, { 7, 7, 0 } },
+  { 0x3ff80000, { 8, 7, 0 } },
+  { 0x3ff90000, { 9, 7, 0 } },
+  { 0x3ffa0000, { 10, 7, 0 } },
+  { 0x3ffb0000, { 11, 7, 0 } },
+  { 0x3ffc0000, { 12, 7, 0 } },
+  { 0x3ffd0000, { 13, 7, 0 } },
+  { 0x3ffe0000, { 14, 7, 0 } },
+  { 0x3fff0000, { 15, 7, 0 } },
+  { 0xc0000000, { 0, 8, 0 } },
+  { 0xc0010000, { 1, 8, 0 } },
+  { 0xc0020000, { 2, 8, 0 } },
+  { 0xc0030000, { 3, 8, 0 } },
+  { 0xc0040000, { 4, 8, 0 } },
+  { 0xc0050000, { 5, 8, 0 } },
+  { 0xc0060000, { 6, 8, 0 } },
+  { 0xc0070000, { 7, 8, 0 } },
+  { 0xc0080000, { 8, 8, 0 } },
+  { 0xc0090000, { 9, 8, 0 } },
+  { 0xc00a0000, { 10, 8, 0 } },
+  { 0xc00b0000, { 11, 8, 0 } },
+  { 0xc00c0000, { 12, 8, 0 } },
+  { 0xc00d0000, { 13, 8, 0 } },
+  { 0xc00e0000, { 14, 8, 0 } },
+  { 0xc00f0000, { 15, 8, 0 } },
+  { 0xc0100000, { 0, 9, 0 } },
+  { 0xc0110000, { 1, 9, 0 } },
+  { 0xc0120000, { 2, 9, 0 } },
+  { 0xc0130000, { 3, 9, 0 } },
+  { 0xc0140000, { 4, 9, 0 } },
+  { 0xc0150000, { 5, 9, 0 } },
+  { 0xc0160000, { 6, 9, 0 } },
+  { 0xc0170000, { 7, 9, 0 } },
+  { 0xc0180000, { 8, 9, 0 } },
+  { 0xc0190000, { 9, 9, 0 } },
+  { 0xc01a0000, { 10, 9, 0 } },
+  { 0xc01b0000, { 11, 9, 0 } },
+  { 0xc01c0000, { 12, 9, 0 } },
+  { 0xc01d0000, { 13, 9, 0 } },
+  { 0xc01e0000, { 14, 9, 0 } },
+  { 0xc01f0000, { 15, 9, 0 } },
+  { 0xc0200000, { 0, 10, 0 } },
+  { 0xc0210000, { 1, 10, 0 } },
+  { 0xc0220000, { 2, 10, 0 } },
+  { 0xc0230000, { 3, 10, 0 } },
+  { 0xc0240000, { 4, 10, 0 } },
+  { 0xc0250000, { 5, 10, 0 } },
+  { 0xc0260000, { 6, 10, 0 } },
+  { 0xc0270000, { 7, 10, 0 } },
+  { 0xc0280000, { 8, 10, 0 } },
+  { 0xc0290000, { 9, 10, 0 } },
+  { 0xc02a0000, { 10, 10, 0 } },
+  { 0xc02b0000, { 11, 10, 0 } },
+  { 0xc02c0000, { 12, 10, 0 } },
+  { 0xc02d0000, { 13, 10, 0 } },
+  { 0xc02e0000, { 14, 10, 0 } },
+  { 0xc02f0000, { 15, 10, 0 } },
+  { 0xc0300000, { 0, 11, 0 } },
+  { 0xc0310000, { 1, 11, 0 } },
+  { 0xc0320000, { 2, 11, 0 } },
+  { 0xc0330000, { 3, 11, 0 } },
+  { 0xc0340000, { 4, 11, 0 } },
+  { 0xc0350000, { 5, 11, 0 } },
+  { 0xc0360000, { 6, 11, 0 } },
+  { 0xc0370000, { 7, 11, 0 } },
+  { 0xc0380000, { 8, 11, 0 } },
+  { 0xc0390000, { 9, 11, 0 } },
+  { 0xc03a0000, { 10, 11, 0 } },
+  { 0xc03b0000, { 11, 11, 0 } },
+  { 0xc03c0000, { 12, 11, 0 } },
+  { 0xc03d0000, { 13, 11, 0 } },
+  { 0xc03e0000, { 14, 11, 0 } },
+  { 0xc03f0000, { 15, 11, 0 } },
+  { 0xbfc00000, { 0, 12, 0 } },
+  { 0xbfc10000, { 1, 12, 0 } },
+  { 0xbfc20000, { 2, 12, 0 } },
+  { 0xbfc30000, { 3, 12, 0 } },
+  { 0xbfc40000, { 4, 12, 0 } },
+  { 0xbfc50000, { 5, 12, 0 } },
+  { 0xbfc60000, { 6, 12, 0 } },
+  { 0xbfc70000, { 7, 12, 0 } },
+  { 0xbfc80000, { 8, 12, 0 } },
+  { 0xbfc90000, { 9, 12, 0 } },
+  { 0xbfca0000, { 10, 12, 0 } },
+  { 0xbfcb0000, { 11, 12, 0 } },
+  { 0xbfcc0000, { 12, 12, 0 } },
+  { 0xbfcd0000, { 13, 12, 0 } },
+  { 0xbfce0000, { 14, 12, 0 } },
+  { 0xbfcf0000, { 15, 12, 0 } },
+  { 0xbfd00000, { 0, 13, 0 } },
+  { 0xbfd10000, { 1, 13, 0 } },
+  { 0xbfd20000, { 2, 13, 0 } },
+  { 0xbfd30000, { 3, 13, 0 } },
+  { 0xbfd40000, { 4, 13, 0 } },
+  { 0xbfd50000, { 5, 13, 0 } },
+  { 0xbfd60000, { 6, 13, 0 } },
+  { 0xbfd70000, { 7, 13, 0 } },
+  { 0xbfd80000, { 8, 13, 0 } },
+  { 0xbfd90000, { 9, 13, 0 } },
+  { 0xbfda0000, { 10, 13, 0 } },
+  { 0xbfdb0000, { 11, 13, 0 } },
+  { 0xbfdc0000, { 12, 13, 0 } },
+  { 0xbfdd0000, { 13, 13, 0 } },
+  { 0xbfde0000, { 14, 13, 0 } },
+  { 0xbfdf0000, { 15, 13, 0 } },
+  { 0xbfe00000, { 0, 14, 0 } },
+  { 0xbfe10000, { 1, 14, 0 } },
+  { 0xbfe20000, { 2, 14, 0 } },
+  { 0xbfe30000, { 3, 14, 0 } },
+  { 0xbfe40000, { 4, 14, 0 } },
+  { 0xbfe50000, { 5, 14, 0 } },
+  { 0xbfe60000, { 6, 14, 0 } },
+  { 0xbfe70000, { 7, 14, 0 } },
+  { 0xbfe80000, { 8, 14, 0 } },
+  { 0xbfe90000, { 9, 14, 0 } },
+  { 0xbfea0000, { 10, 14, 0 } },
+  { 0xbfeb0000, { 11, 14, 0 } },
+  { 0xbfec0000, { 12, 14, 0 } },
+  { 0xbfed0000, { 13, 14, 0 } },
+  { 0xbfee0000, { 14, 14, 0 } },
+  { 0xbfef0000, { 15, 14, 0 } },
+  { 0xbff00000, { 0, 15, 0 } },
+  { 0xbff10000, { 1, 15, 0 } },
+  { 0xbff20000, { 2, 15, 0 } },
+  { 0xbff30000, { 3, 15, 0 } },
+  { 0xbff40000, { 4, 15, 0 } },
+  { 0xbff50000, { 5, 15, 0 } },
+  { 0xbff60000, { 6, 15, 0 } },
+  { 0xbff70000, { 7, 15, 0 } },
+  { 0xbff80000, { 8, 15, 0 } },
+  { 0xbff90000, { 9, 15, 0 } },
+  { 0xbffa0000, { 10, 15, 0 } },
+  { 0xbffb0000, { 11, 15, 0 } },
+  { 0xbffc0000, { 12, 15, 0 } },
+  { 0xbffd0000, { 13, 15, 0 } },
+  { 0xbffe0000, { 14, 15, 0 } },
+  { 0xbfff0000, { 15, 15, 0 } },
diff --git a/js/src/jit/arm/LIR-arm.h b/js/src/jit/arm/LIR-arm.h
new file mode 100644
index 0000000000..395b285c93
--- /dev/null
+++ b/js/src/jit/arm/LIR-arm.h
@@ -0,0 +1,511 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_LIR_arm_h
+#define jit_arm_LIR_arm_h
+
+namespace js {
+namespace jit {
+
+class LBoxFloatingPoint : public LInstructionHelper<2, 1, 1> {
+  MIRType type_;
+
+ public:
+  LIR_HEADER(BoxFloatingPoint);
+
+  LBoxFloatingPoint(const LAllocation& in, const LDefinition& temp,
+                    MIRType type)
+      : LInstructionHelper(classOpcode), type_(type) {
+    setOperand(0, in);
+    setTemp(0, temp);
+  }
+
+  MIRType type() const { return type_; }
+  const char* extraName() const { return StringFromMIRType(type_); }
+};
+
+class LUnbox : public LInstructionHelper<1, 2, 0> {
+ public:
+  LIR_HEADER(Unbox);
+
+  LUnbox() : LInstructionHelper(classOpcode) {}
+
+  MUnbox* mir() const { return mir_->toUnbox(); }
+  const LAllocation* payload() { return getOperand(0); }
+  const LAllocation* type() { return getOperand(1); }
+  const char* extraName() const { return StringFromMIRType(mir()->type()); }
+};
+
+class LUnboxFloatingPoint : public LInstructionHelper<1, 2, 0> {
+  MIRType type_;
+
+ public:
+  LIR_HEADER(UnboxFloatingPoint);
+
+  static const size_t Input = 0;
+
+  LUnboxFloatingPoint(const LBoxAllocation& input, MIRType type)
+      : LInstructionHelper(classOpcode), type_(type) {
+    setBoxOperand(Input, input);
+  }
+
+  MUnbox* mir() const { return mir_->toUnbox(); }
+
+  MIRType type() const { return type_; }
+  const char* extraName() const { return StringFromMIRType(type_); }
+};
+
+// Convert a 32-bit unsigned integer to a double.
+class LWasmUint32ToDouble : public LInstructionHelper<1, 1, 0> {
+ public:
+  LIR_HEADER(WasmUint32ToDouble)
+
+  explicit LWasmUint32ToDouble(const LAllocation& input)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, input);
+  }
+};
+
+// Convert a 32-bit unsigned integer to a float32.
+class LWasmUint32ToFloat32 : public LInstructionHelper<1, 1, 0> {
+ public:
+  LIR_HEADER(WasmUint32ToFloat32)
+
+  explicit LWasmUint32ToFloat32(const LAllocation& input)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, input);
+  }
+};
+
+class LDivI : public LBinaryMath<1> {
+ public:
+  LIR_HEADER(DivI);
+
+  LDivI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+    setTemp(0, temp);
+  }
+
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LDivOrModI64
+    : public LCallInstructionHelper<INT64_PIECES, INT64_PIECES * 2 + 1, 0> {
+ public:
+  LIR_HEADER(DivOrModI64)
+
+  static const size_t Lhs = 0;
+  static const size_t Rhs = INT64_PIECES;
+  static const size_t Instance = 2 * INT64_PIECES;
+
+  LDivOrModI64(const LInt64Allocation& lhs, const LInt64Allocation& rhs,
+               const LAllocation& instance)
+      : LCallInstructionHelper(classOpcode) {
+    setInt64Operand(Lhs, lhs);
+    setInt64Operand(Rhs, rhs);
+    setOperand(Instance, instance);
+  }
+
+  MDefinition* mir() const {
+    MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64());
+    return mir_;
+  }
+  bool canBeDivideByZero() const {
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->canBeDivideByZero();
+    }
+    return mir_->toWasmBuiltinDivI64()->canBeDivideByZero();
+  }
+  bool canBeNegativeOverflow() const {
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->canBeNegativeDividend();
+    }
+    return mir_->toWasmBuiltinDivI64()->canBeNegativeOverflow();
+  }
+  wasm::BytecodeOffset bytecodeOffset() const {
+    MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64());
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->bytecodeOffset();
+    }
+    return mir_->toWasmBuiltinDivI64()->bytecodeOffset();
+  }
+};
+
+class LUDivOrModI64
+    : public LCallInstructionHelper<INT64_PIECES, INT64_PIECES * 2 + 1, 0> {
+ public:
+  LIR_HEADER(UDivOrModI64)
+
+  static const size_t Lhs = 0;
+  static const size_t Rhs = INT64_PIECES;
+  static const size_t Instance = 2 * INT64_PIECES;
+
+  LUDivOrModI64(const LInt64Allocation& lhs, const LInt64Allocation& rhs,
+                const LAllocation& instance)
+      : LCallInstructionHelper(classOpcode) {
+    setInt64Operand(Lhs, lhs);
+    setInt64Operand(Rhs, rhs);
+    setOperand(Instance, instance);
+  }
+
+  MDefinition* mir() const {
+    MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64());
+    return mir_;
+  }
+  bool canBeDivideByZero() const {
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->canBeDivideByZero();
+    }
+    return mir_->toWasmBuiltinDivI64()->canBeDivideByZero();
+  }
+  bool canBeNegativeOverflow() const {
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->canBeNegativeDividend();
+    }
+    return mir_->toWasmBuiltinDivI64()->canBeNegativeOverflow();
+  }
+  wasm::BytecodeOffset bytecodeOffset() const {
+    MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64());
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->bytecodeOffset();
+    }
+    return mir_->toWasmBuiltinDivI64()->bytecodeOffset();
+  }
+};
+
+// LSoftDivI is a software divide for ARM cores that don't support a hardware
+// divide instruction, implemented as a C++ native call.
+class LSoftDivI : public LBinaryCallInstructionHelper<1, 0> {
+ public:
+  LIR_HEADER(SoftDivI);
+
+  LSoftDivI(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryCallInstructionHelper(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LDivPowTwoI : public LInstructionHelper<1, 1, 0> {
+  const int32_t shift_;
+
+ public:
+  LIR_HEADER(DivPowTwoI)
+
+  LDivPowTwoI(const LAllocation& lhs, int32_t shift)
+      : LInstructionHelper(classOpcode), shift_(shift) {
+    setOperand(0, lhs);
+  }
+
+  const LAllocation* numerator() { return getOperand(0); }
+
+  int32_t shift() { return shift_; }
+
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LModI : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(ModI);
+
+  LModI(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+class LSoftModI : public LBinaryCallInstructionHelper<1, 1> {
+ public:
+  LIR_HEADER(SoftModI);
+
+  LSoftModI(const LAllocation& lhs, const LAllocation& rhs,
+            const LDefinition& temp)
+      : LBinaryCallInstructionHelper(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+    setTemp(0, temp);
+  }
+
+  const LDefinition* callTemp() { return getTemp(0); }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+class LModPowTwoI : public LInstructionHelper<1, 1, 0> {
+  const int32_t shift_;
+
+ public:
+  LIR_HEADER(ModPowTwoI);
+  int32_t shift() { return shift_; }
+
+  LModPowTwoI(const LAllocation& lhs, int32_t shift)
+      : LInstructionHelper(classOpcode), shift_(shift) {
+    setOperand(0, lhs);
+  }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+class LModMaskI : public LInstructionHelper<1, 1, 2> {
+  const int32_t shift_;
+
+ public:
+  LIR_HEADER(ModMaskI);
+
+  LModMaskI(const LAllocation& lhs, const LDefinition& temp1,
+            const LDefinition& temp2, int32_t shift)
+      : LInstructionHelper(classOpcode), shift_(shift) {
+    setOperand(0, lhs);
+    setTemp(0, temp1);
+    setTemp(1, temp2);
+  }
+
+  int32_t shift() const { return shift_; }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+// Takes a tableswitch with an integer to decide.
+class LTableSwitch : public LInstructionHelper<0, 1, 1> {
+ public:
+  LIR_HEADER(TableSwitch);
+
+  LTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
+               MTableSwitch* ins)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, in);
+    setTemp(0, inputCopy);
+    setMir(ins);
+  }
+
+  MTableSwitch* mir() const { return mir_->toTableSwitch(); }
+
+  const LAllocation* index() { return getOperand(0); }
+  const LDefinition* tempInt() { return getTemp(0); }
+  // This is added to share the same CodeGenerator prefixes.
+  const LDefinition* tempPointer() { return nullptr; }
+};
+
+// Takes a tableswitch with an integer to decide.
+class LTableSwitchV : public LInstructionHelper<0, BOX_PIECES, 2> {
+ public:
+  LIR_HEADER(TableSwitchV);
+
+  LTableSwitchV(const LBoxAllocation& input, const LDefinition& inputCopy,
+                const LDefinition& floatCopy, MTableSwitch* ins)
+      : LInstructionHelper(classOpcode) {
+    setBoxOperand(InputValue, input);
+    setTemp(0, inputCopy);
+    setTemp(1, floatCopy);
+    setMir(ins);
+  }
+
+  MTableSwitch* mir() const { return mir_->toTableSwitch(); }
+
+  static const size_t InputValue = 0;
+
+  const LDefinition* tempInt() { return getTemp(0); }
+  const LDefinition* tempFloat() { return getTemp(1); }
+  const LDefinition* tempPointer() { return nullptr; }
+};
+
+class LMulI : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(MulI);
+
+  LMulI() : LBinaryMath(classOpcode) {}
+
+  MMul* mir() { return mir_->toMul(); }
+};
+
+class LUDiv : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(UDiv);
+
+  LUDiv() : LBinaryMath(classOpcode) {}
+
+  MDiv* mir() { return mir_->toDiv(); }
+};
+
+class LUMod : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(UMod);
+
+  LUMod() : LBinaryMath(classOpcode) {}
+
+  MMod* mir() { return mir_->toMod(); }
+};
+
+class LSoftUDivOrMod : public LBinaryCallInstructionHelper<1, 0> {
+ public:
+  LIR_HEADER(SoftUDivOrMod);
+
+  LSoftUDivOrMod(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryCallInstructionHelper(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MInstruction* mir() { return mir_->toInstruction(); }
+};
+
+class LWasmTruncateToInt64 : public LCallInstructionHelper<INT64_PIECES, 2, 0> {
+  static const size_t Input = 0;
+  static const size_t Instance = 1;
+
+ public:
+  LIR_HEADER(WasmTruncateToInt64);
+
+  LWasmTruncateToInt64(const LAllocation& in, const LAllocation& instance)
+      : LCallInstructionHelper(classOpcode) {
+    setOperand(Input, in);
+    setOperand(Instance, instance);
+  }
+
+  LAllocation* input() { return getOperand(Input); }
+  LAllocation* instance() { return getOperand(Instance); }
+
+  MWasmBuiltinTruncateToInt64* mir() const {
+    return mir_->toWasmBuiltinTruncateToInt64();
+  }
+};
+
+class LInt64ToFloatingPointCall
+    : public LCallInstructionHelper<1, INT64_PIECES + 1, 0> {
+ public:
+  LIR_HEADER(Int64ToFloatingPointCall);
+
+  static const size_t Input = 0;
+  static const size_t Instance = INT64_PIECES;
+
+  LInt64ToFloatingPointCall(const LInt64Allocation& in,
+                            const LAllocation& instance)
+      : LCallInstructionHelper(classOpcode) {
+    setInt64Operand(Input, in);
+    setOperand(Instance, instance);
+  }
+
+  LAllocation* input() { return getOperand(Input); }
+  LAllocation* instance() { return getOperand(Instance); }
+
+  MBuiltinInt64ToFloatingPoint* mir() const {
+    return mir_->toBuiltinInt64ToFloatingPoint();
+  }
+};
+
+class LWasmAtomicLoadI64 : public LInstructionHelper<INT64_PIECES, 1, 0> {
+ public:
+  LIR_HEADER(WasmAtomicLoadI64);
+
+  explicit LWasmAtomicLoadI64(const LAllocation& ptr)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, ptr);
+  }
+
+  MWasmLoad* mir() const { return mir_->toWasmLoad(); }
+  const LAllocation* ptr() { return getOperand(0); }
+};
+
+class LWasmAtomicStoreI64 : public LInstructionHelper<0, 1 + INT64_PIECES, 2> {
+ public:
+  LIR_HEADER(WasmAtomicStoreI64);
+
+  LWasmAtomicStoreI64(const LAllocation& ptr, const LInt64Allocation& value,
+                      const LDefinition& tmpLow, const LDefinition& tmpHigh)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, ptr);
+    setInt64Operand(1, value);
+    setTemp(0, tmpLow);
+    setTemp(1, tmpHigh);
+  }
+
+  MWasmStore* mir() const { return mir_->toWasmStore(); }
+  const LAllocation* ptr() { return getOperand(0); }
+  const LInt64Allocation value() { return getInt64Operand(1); }
+  const LDefinition* tmpLow() { return getTemp(0); }
+  const LDefinition* tmpHigh() { return getTemp(1); }
+};
+
+class LWasmCompareExchangeI64
+    : public LInstructionHelper<INT64_PIECES, 1 + 2 * INT64_PIECES, 0> {
+ public:
+  LIR_HEADER(WasmCompareExchangeI64);
+
+  LWasmCompareExchangeI64(const LAllocation& ptr,
+                          const LInt64Allocation& expected,
+                          const LInt64Allocation& replacement)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, ptr);
+    setInt64Operand(1, expected);
+    setInt64Operand(1 + INT64_PIECES, replacement);
+  }
+
+  MWasmCompareExchangeHeap* mir() const {
+    return mir_->toWasmCompareExchangeHeap();
+  }
+  const LAllocation* ptr() { return getOperand(0); }
+  const LInt64Allocation expected() { return getInt64Operand(1); }
+  const LInt64Allocation replacement() {
+    return getInt64Operand(1 + INT64_PIECES);
+  }
+};
+
+class LWasmAtomicBinopI64
+    : public LInstructionHelper<INT64_PIECES, 1 + INT64_PIECES, 2> {
+  const wasm::MemoryAccessDesc& access_;
+  AtomicOp op_;
+
+ public:
+  LIR_HEADER(WasmAtomicBinopI64);
+
+  LWasmAtomicBinopI64(const LAllocation& ptr, const LInt64Allocation& value,
+                      const LDefinition& tmpLow, const LDefinition& tmpHigh,
+                      const wasm::MemoryAccessDesc& access, AtomicOp op)
+      : LInstructionHelper(classOpcode), access_(access), op_(op) {
+    setOperand(0, ptr);
+    setInt64Operand(1, value);
+    setTemp(0, tmpLow);
+    setTemp(1, tmpHigh);
+  }
+
+  const LAllocation* ptr() { return getOperand(0); }
+  const LInt64Allocation value() { return getInt64Operand(1); }
+  const wasm::MemoryAccessDesc& access() { return access_; }
+  AtomicOp operation() const { return op_; }
+  const LDefinition* tmpLow() { return getTemp(0); }
+  const LDefinition* tmpHigh() { return getTemp(1); }
+};
+
+class LWasmAtomicExchangeI64
+    : public LInstructionHelper<INT64_PIECES, 1 + INT64_PIECES, 0> {
+  const wasm::MemoryAccessDesc& access_;
+
+ public:
+  LIR_HEADER(WasmAtomicExchangeI64);
+
+  LWasmAtomicExchangeI64(const LAllocation& ptr, const LInt64Allocation& value,
+                         const wasm::MemoryAccessDesc& access)
+      : LInstructionHelper(classOpcode), access_(access) {
+    setOperand(0, ptr);
+    setInt64Operand(1, value);
+  }
+
+  const LAllocation* ptr() { return getOperand(0); }
+  const LInt64Allocation value() { return getInt64Operand(1); }
+  const wasm::MemoryAccessDesc& access() { return access_; }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_LIR_arm_h */
diff --git a/js/src/jit/arm/Lowering-arm.cpp b/js/src/jit/arm/Lowering-arm.cpp
new file mode 100644
index 0000000000..e384ee7911
--- /dev/null
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -0,0 +1,1223 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/Lowering-arm.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/Lowering.h"
+#include "jit/MIR.h"
+#include "jit/shared/Lowering-shared-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::FloorLog2;
+
+LBoxAllocation LIRGeneratorARM::useBoxFixed(MDefinition* mir, Register reg1,
+                                            Register reg2, bool useAtStart) {
+  MOZ_ASSERT(mir->type() == MIRType::Value);
+  MOZ_ASSERT(reg1 != reg2);
+
+  ensureDefined(mir);
+  return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart),
+                        LUse(reg2, VirtualRegisterOfPayload(mir), useAtStart));
+}
+
+LAllocation LIRGeneratorARM::useByteOpRegister(MDefinition* mir) {
+  return useRegister(mir);
+}
+
+LAllocation LIRGeneratorARM::useByteOpRegisterAtStart(MDefinition* mir) {
+  return useRegisterAtStart(mir);
+}
+
+LAllocation LIRGeneratorARM::useByteOpRegisterOrNonDoubleConstant(
+    MDefinition* mir) {
+  return useRegisterOrNonDoubleConstant(mir);
+}
+
+LDefinition LIRGeneratorARM::tempByteOpRegister() { return temp(); }
+
+void LIRGenerator::visitBox(MBox* box) {
+  MDefinition* inner = box->getOperand(0);
+
+  // If the box wrapped a double, it needs a new register.
+  if (IsFloatingPointType(inner->type())) {
+    defineBox(new (alloc()) LBoxFloatingPoint(
+                  useRegisterAtStart(inner), tempCopy(inner, 0), inner->type()),
+              box);
+    return;
+  }
+
+  if (box->canEmitAtUses()) {
+    emitAtUses(box);
+    return;
+  }
+
+  if (inner->isConstant()) {
+    defineBox(new (alloc()) LValue(inner->toConstant()->toJSValue()), box);
+    return;
+  }
+
+  LBox* lir = new (alloc()) LBox(use(inner), inner->type());
+
+  // Otherwise, we should not define a new register for the payload portion
+  // of the output, so bypass defineBox().
+  uint32_t vreg = getVirtualRegister();
+
+  // Note that because we're using BogusTemp(), we do not change the type of
+  // the definition. We also do not define the first output as "TYPE",
+  // because it has no corresponding payload at (vreg + 1). Also note that
+  // although we copy the input's original type for the payload half of the
+  // definition, this is only for clarity. BogusTemp() definitions are
+  // ignored.
+  lir->setDef(0, LDefinition(vreg, LDefinition::GENERAL));
+  lir->setDef(1, LDefinition::BogusTemp());
+  box->setVirtualRegister(vreg);
+  add(lir);
+}
+
+void LIRGenerator::visitUnbox(MUnbox* unbox) {
+  MDefinition* inner = unbox->getOperand(0);
+
+  // An unbox on arm reads in a type tag (either in memory or a register) and
+  // a payload. Unlike most instructions consuming a box, we ask for the type
+  // second, so that the result can re-use the first input.
+  MOZ_ASSERT(inner->type() == MIRType::Value);
+
+  ensureDefined(inner);
+
+  if (IsFloatingPointType(unbox->type())) {
+    LUnboxFloatingPoint* lir =
+        new (alloc()) LUnboxFloatingPoint(useBox(inner), unbox->type());
+    if (unbox->fallible()) {
+      assignSnapshot(lir, unbox->bailoutKind());
+    }
+    define(lir, unbox);
+    return;
+  }
+
+  // Swap the order we use the box pieces so we can re-use the payload register.
+  LUnbox* lir = new (alloc()) LUnbox;
+  lir->setOperand(0, usePayloadInRegisterAtStart(inner));
+  lir->setOperand(1, useType(inner, LUse::REGISTER));
+
+  if (unbox->fallible()) {
+    assignSnapshot(lir, unbox->bailoutKind());
+  }
+
+  // Types and payloads form two separate intervals. If the type becomes dead
+  // before the payload, it could be used as a Value without the type being
+  // recoverable. Unbox's purpose is to eagerly kill the definition of a type
+  // tag, so keeping both alive (for the purpose of gcmaps) is unappealing.
+  // Instead, we create a new virtual register.
+  defineReuseInput(lir, unbox, 0);
+}
+
+void LIRGenerator::visitReturnImpl(MDefinition* opd, bool isGenerator) {
+  MOZ_ASSERT(opd->type() == MIRType::Value);
+
+  LReturn* ins = new (alloc()) LReturn(isGenerator);
+  ins->setOperand(0, LUse(JSReturnReg_Type));
+  ins->setOperand(1, LUse(JSReturnReg_Data));
+  fillBoxUses(ins, 0, opd);
+  add(ins);
+}
+
+void LIRGeneratorARM::defineInt64Phi(MPhi* phi, size_t lirIndex) {
+  LPhi* low = current->getPhi(lirIndex + INT64LOW_INDEX);
+  LPhi* high = current->getPhi(lirIndex + INT64HIGH_INDEX);
+
+  uint32_t lowVreg = getVirtualRegister();
+
+  phi->setVirtualRegister(lowVreg);
+
+  uint32_t highVreg = getVirtualRegister();
+  MOZ_ASSERT(lowVreg + INT64HIGH_INDEX == highVreg + INT64LOW_INDEX);
+
+  low->setDef(0, LDefinition(lowVreg, LDefinition::INT32));
+  high->setDef(0, LDefinition(highVreg, LDefinition::INT32));
+  annotate(high);
+  annotate(low);
+}
+
+void LIRGeneratorARM::lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition,
+                                         LBlock* block, size_t lirIndex) {
+  MDefinition* operand = phi->getOperand(inputPosition);
+  LPhi* low = block->getPhi(lirIndex + INT64LOW_INDEX);
+  LPhi* high = block->getPhi(lirIndex + INT64HIGH_INDEX);
+  low->setOperand(inputPosition,
+                  LUse(operand->virtualRegister() + INT64LOW_INDEX, LUse::ANY));
+  high->setOperand(
+      inputPosition,
+      LUse(operand->virtualRegister() + INT64HIGH_INDEX, LUse::ANY));
+}
+
+// x = !y
+void LIRGeneratorARM::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
+                                  MDefinition* mir, MDefinition* input) {
+  ins->setOperand(
+      0, ins->snapshot() ? useRegister(input) : useRegisterAtStart(input));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+// z = x+y
+void LIRGeneratorARM::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
+                                  MDefinition* mir, MDefinition* lhs,
+                                  MDefinition* rhs) {
+  // Some operations depend on checking inputs after writing the result, e.g.
+  // MulI, but only for bail out paths so useAtStart when no bailouts.
+  ins->setOperand(0,
+                  ins->snapshot() ? useRegister(lhs) : useRegisterAtStart(lhs));
+  ins->setOperand(1, ins->snapshot() ? useRegisterOrConstant(rhs)
+                                     : useRegisterOrConstantAtStart(rhs));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+void LIRGeneratorARM::lowerForALUInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, MDefinition* mir,
+    MDefinition* input) {
+  ins->setInt64Operand(0, useInt64RegisterAtStart(input));
+  defineInt64ReuseInput(ins, mir, 0);
+}
+
+void LIRGeneratorARM::lowerForALUInt64(
+    LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+  ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+  ins->setInt64Operand(INT64_PIECES, useInt64OrConstant(rhs));
+  defineInt64ReuseInput(ins, mir, 0);
+}
+
+void LIRGeneratorARM::lowerForMulInt64(LMulI64* ins, MMul* mir,
+                                       MDefinition* lhs, MDefinition* rhs) {
+  bool needsTemp = true;
+
+  if (rhs->isConstant()) {
+    int64_t constant = rhs->toConstant()->toInt64();
+    int32_t shift = mozilla::FloorLog2(constant);
+    // See special cases in CodeGeneratorARM::visitMulI64
+    if (constant >= -1 && constant <= 2) {
+      needsTemp = false;
+    }
+    if (constant > 0 && int64_t(1) << shift == constant) {
+      needsTemp = false;
+    }
+  }
+
+  ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+  ins->setInt64Operand(INT64_PIECES, useInt64OrConstant(rhs));
+  if (needsTemp) {
+    ins->setTemp(0, temp());
+  }
+
+  defineInt64ReuseInput(ins, mir, 0);
+}
+
+void LIRGeneratorARM::lowerForCompareI64AndBranch(MTest* mir, MCompare* comp,
+                                                  JSOp op, MDefinition* left,
+                                                  MDefinition* right,
+                                                  MBasicBlock* ifTrue,
+                                                  MBasicBlock* ifFalse) {
+  LCompareI64AndBranch* lir = new (alloc())
+      LCompareI64AndBranch(comp, op, useInt64Register(left),
+                           useInt64OrConstant(right), ifTrue, ifFalse);
+  add(lir, mir);
+}
+
+void LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
+                                  MDefinition* mir, MDefinition* input) {
+  ins->setOperand(0, useRegisterAtStart(input));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+template <size_t Temps>
+void LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
+                                  MDefinition* mir, MDefinition* lhs,
+                                  MDefinition* rhs) {
+  ins->setOperand(0, useRegisterAtStart(lhs));
+  ins->setOperand(1, useRegisterAtStart(rhs));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+template void LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
+                                           MDefinition* mir, MDefinition* lhs,
+                                           MDefinition* rhs);
+template void LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, 1>* ins,
+                                           MDefinition* mir, MDefinition* lhs,
+                                           MDefinition* rhs);
+
+void LIRGeneratorARM::lowerForBitAndAndBranch(LBitAndAndBranch* baab,
+                                              MInstruction* mir,
+                                              MDefinition* lhs,
+                                              MDefinition* rhs) {
+  baab->setOperand(0, useRegisterAtStart(lhs));
+  baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
+  add(baab, mir);
+}
+
+void LIRGeneratorARM::lowerWasmBuiltinTruncateToInt32(
+    MWasmBuiltinTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+  if (opd->type() == MIRType::Double) {
+    define(new (alloc()) LWasmBuiltinTruncateDToInt32(
+               useRegister(opd), useFixedAtStart(ins->instance(), InstanceReg),
+               LDefinition::BogusTemp()),
+           ins);
+    return;
+  }
+
+  define(new (alloc()) LWasmBuiltinTruncateFToInt32(
+             useRegister(opd), useFixedAtStart(ins->instance(), InstanceReg),
+             LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGeneratorARM::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition,
+                                           LBlock* block, size_t lirIndex) {
+  MDefinition* operand = phi->getOperand(inputPosition);
+  LPhi* type = block->getPhi(lirIndex + VREG_TYPE_OFFSET);
+  LPhi* payload = block->getPhi(lirIndex + VREG_DATA_OFFSET);
+  type->setOperand(
+      inputPosition,
+      LUse(operand->virtualRegister() + VREG_TYPE_OFFSET, LUse::ANY));
+  payload->setOperand(inputPosition,
+                      LUse(VirtualRegisterOfPayload(operand), LUse::ANY));
+}
+
+void LIRGeneratorARM::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
+                                    MDefinition* mir, MDefinition* lhs,
+                                    MDefinition* rhs) {
+  ins->setOperand(0, useRegister(lhs));
+  ins->setOperand(1, useRegisterOrConstant(rhs));
+  define(ins, mir);
+}
+
+template <size_t Temps>
+void LIRGeneratorARM::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+  if (mir->isRotate() && !rhs->isConstant()) {
+    ins->setTemp(0, temp());
+  }
+
+  ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+  ins->setOperand(INT64_PIECES, useRegisterOrConstant(rhs));
+  defineInt64ReuseInput(ins, mir, 0);
+}
+
+template void LIRGeneratorARM::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorARM::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+void LIRGeneratorARM::lowerDivI(MDiv* div) {
+  if (div->isUnsigned()) {
+    lowerUDiv(div);
+    return;
+  }
+
+  // Division instructions are slow. Division by constant denominators can be
+  // rewritten to use other instructions.
+  if (div->rhs()->isConstant()) {
+    int32_t rhs = div->rhs()->toConstant()->toInt32();
+    // Check for division by a positive power of two, which is an easy and
+    // important case to optimize. Note that other optimizations are also
+    // possible; division by negative powers of two can be optimized in a
+    // similar manner as positive powers of two, and division by other
+    // constants can be optimized by a reciprocal multiplication technique.
+    int32_t shift = FloorLog2(rhs);
+    if (rhs > 0 && 1 << shift == rhs) {
+      LDivPowTwoI* lir =
+          new (alloc()) LDivPowTwoI(useRegisterAtStart(div->lhs()), shift);
+      if (div->fallible()) {
+        assignSnapshot(lir, div->bailoutKind());
+      }
+      define(lir, div);
+      return;
+    }
+  }
+
+  if (HasIDIV()) {
+    LDivI* lir = new (alloc())
+        LDivI(useRegister(div->lhs()), useRegister(div->rhs()), temp());
+    if (div->fallible()) {
+      assignSnapshot(lir, div->bailoutKind());
+    }
+    define(lir, div);
+    return;
+  }
+
+  LSoftDivI* lir = new (alloc()) LSoftDivI(useFixedAtStart(div->lhs(), r0),
+                                           useFixedAtStart(div->rhs(), r1));
+
+  if (div->fallible()) {
+    assignSnapshot(lir, div->bailoutKind());
+  }
+
+  defineReturn(lir, div);
+}
+
+void LIRGeneratorARM::lowerNegI(MInstruction* ins, MDefinition* input) {
+  define(new (alloc()) LNegI(useRegisterAtStart(input)), ins);
+}
+
+void LIRGeneratorARM::lowerNegI64(MInstruction* ins, MDefinition* input) {
+  // Reuse the input.  Define + use-at-start would create risk that the output
+  // uses the same register pair as the input but in reverse order.  Reusing
+  // probably has less spilling than the alternative, define + use.
+  defineInt64ReuseInput(new (alloc()) LNegI64(useInt64RegisterAtStart(input)),
+                        ins, 0);
+}
+
+void LIRGenerator::visitAbs(MAbs* ins) {
+  define(allocateAbs(ins, useRegisterAtStart(ins->input())), ins);
+}
+
+void LIRGeneratorARM::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs) {
+  LMulI* lir = new (alloc()) LMulI;
+  if (mul->fallible()) {
+    assignSnapshot(lir, mul->bailoutKind());
+  }
+  lowerForALU(lir, mul, lhs, rhs);
+}
+
+void LIRGeneratorARM::lowerModI(MMod* mod) {
+  if (mod->isUnsigned()) {
+    lowerUMod(mod);
+    return;
+  }
+
+  if (mod->rhs()->isConstant()) {
+    int32_t rhs = mod->rhs()->toConstant()->toInt32();
+    int32_t shift = FloorLog2(rhs);
+    if (rhs > 0 && 1 << shift == rhs) {
+      LModPowTwoI* lir =
+          new (alloc()) LModPowTwoI(useRegister(mod->lhs()), shift);
+      if (mod->fallible()) {
+        assignSnapshot(lir, mod->bailoutKind());
+      }
+      define(lir, mod);
+      return;
+    }
+    if (shift < 31 && (1 << (shift + 1)) - 1 == rhs) {
+      MOZ_ASSERT(rhs);
+      LModMaskI* lir = new (alloc())
+          LModMaskI(useRegister(mod->lhs()), temp(), temp(), shift + 1);
+      if (mod->fallible()) {
+        assignSnapshot(lir, mod->bailoutKind());
+      }
+      define(lir, mod);
+      return;
+    }
+  }
+
+  if (HasIDIV()) {
+    LModI* lir =
+        new (alloc()) LModI(useRegister(mod->lhs()), useRegister(mod->rhs()));
+    if (mod->fallible()) {
+      assignSnapshot(lir, mod->bailoutKind());
+    }
+    define(lir, mod);
+    return;
+  }
+
+  LSoftModI* lir =
+      new (alloc()) LSoftModI(useFixedAtStart(mod->lhs(), r0),
+                              useFixedAtStart(mod->rhs(), r1), tempFixed(r2));
+
+  if (mod->fallible()) {
+    assignSnapshot(lir, mod->bailoutKind());
+  }
+
+  defineReturn(lir, mod);
+}
+
+void LIRGeneratorARM::lowerDivI64(MDiv* div) {
+  MOZ_CRASH("We use MWasmBuiltinDivI64 instead.");
+}
+
+void LIRGeneratorARM::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
+  if (div->isUnsigned()) {
+    LUDivOrModI64* lir = new (alloc())
+        LUDivOrModI64(useInt64RegisterAtStart(div->lhs()),
+                      useInt64RegisterAtStart(div->rhs()),
+                      useFixedAtStart(div->instance(), InstanceReg));
+    defineReturn(lir, div);
+    return;
+  }
+
+  LDivOrModI64* lir = new (alloc()) LDivOrModI64(
+      useInt64RegisterAtStart(div->lhs()), useInt64RegisterAtStart(div->rhs()),
+      useFixedAtStart(div->instance(), InstanceReg));
+  defineReturn(lir, div);
+}
+
+void LIRGeneratorARM::lowerModI64(MMod* mod) {
+  MOZ_CRASH("We use MWasmBuiltinModI64 instead.");
+}
+
+void LIRGeneratorARM::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
+  if (mod->isUnsigned()) {
+    LUDivOrModI64* lir = new (alloc())
+        LUDivOrModI64(useInt64RegisterAtStart(mod->lhs()),
+                      useInt64RegisterAtStart(mod->rhs()),
+                      useFixedAtStart(mod->instance(), InstanceReg));
+    defineReturn(lir, mod);
+    return;
+  }
+
+  LDivOrModI64* lir = new (alloc()) LDivOrModI64(
+      useInt64RegisterAtStart(mod->lhs()), useInt64RegisterAtStart(mod->rhs()),
+      useFixedAtStart(mod->instance(), InstanceReg));
+  defineReturn(lir, mod);
+}
+
+void LIRGeneratorARM::lowerUDivI64(MDiv* div) {
+  MOZ_CRASH("We use MWasmBuiltinDivI64 instead.");
+}
+
+void LIRGeneratorARM::lowerUModI64(MMod* mod) {
+  MOZ_CRASH("We use MWasmBuiltinModI64 instead.");
+}
+
+void LIRGenerator::visitPowHalf(MPowHalf* ins) {
+  MDefinition* input = ins->input();
+  MOZ_ASSERT(input->type() == MIRType::Double);
+  LPowHalfD* lir = new (alloc()) LPowHalfD(useRegisterAtStart(input));
+  defineReuseInput(lir, ins, 0);
+}
+
+void LIRGeneratorARM::lowerWasmSelectI(MWasmSelect* select) {
+  auto* lir = new (alloc())
+      LWasmSelect(useRegisterAtStart(select->trueExpr()),
+                  useAny(select->falseExpr()), useRegister(select->condExpr()));
+  defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
+}
+
+void LIRGeneratorARM::lowerWasmSelectI64(MWasmSelect* select) {
+  auto* lir = new (alloc()) LWasmSelectI64(
+      useInt64RegisterAtStart(select->trueExpr()),
+      useInt64(select->falseExpr()), useRegister(select->condExpr()));
+  defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex);
+}
+
+LTableSwitch* LIRGeneratorARM::newLTableSwitch(const LAllocation& in,
+                                               const LDefinition& inputCopy,
+                                               MTableSwitch* tableswitch) {
+  return new (alloc()) LTableSwitch(in, inputCopy, tableswitch);
+}
+
+LTableSwitchV* LIRGeneratorARM::newLTableSwitchV(MTableSwitch* tableswitch) {
+  return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(),
+                                     tempDouble(), tableswitch);
+}
+
+void LIRGeneratorARM::lowerUrshD(MUrsh* mir) {
+  MDefinition* lhs = mir->lhs();
+  MDefinition* rhs = mir->rhs();
+
+  MOZ_ASSERT(lhs->type() == MIRType::Int32);
+  MOZ_ASSERT(rhs->type() == MIRType::Int32);
+
+  LUrshD* lir = new (alloc())
+      LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp());
+  define(lir, mir);
+}
+
+void LIRGeneratorARM::lowerPowOfTwoI(MPow* mir) {
+  int32_t base = mir->input()->toConstant()->toInt32();
+  MDefinition* power = mir->power();
+
+  auto* lir = new (alloc()) LPowOfTwoI(useRegister(power), base);
+  assignSnapshot(lir, mir->bailoutKind());
+  define(lir, mir);
+}
+
+void LIRGeneratorARM::lowerBigIntLsh(MBigIntLsh* ins) {
+  auto* lir = new (alloc()) LBigIntLsh(
+      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM::lowerBigIntRsh(MBigIntRsh* ins) {
+  auto* lir = new (alloc()) LBigIntRsh(
+      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM::lowerBigIntDiv(MBigIntDiv* ins) {
+  LDefinition temp1, temp2;
+  if (HasIDIV()) {
+    temp1 = temp();
+    temp2 = temp();
+  } else {
+    temp1 = tempFixed(r0);
+    temp2 = tempFixed(r1);
+  }
+  auto* lir = new (alloc()) LBigIntDiv(useRegister(ins->lhs()),
+                                       useRegister(ins->rhs()), temp1, temp2);
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM::lowerBigIntMod(MBigIntMod* ins) {
+  LDefinition temp1, temp2;
+  if (HasIDIV()) {
+    temp1 = temp();
+    temp2 = temp();
+  } else {
+    temp1 = tempFixed(r0);
+    temp2 = tempFixed(r1);
+  }
+  auto* lir = new (alloc()) LBigIntMod(useRegister(ins->lhs()),
+                                       useRegister(ins->rhs()), temp1, temp2);
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
+  if (ins->type() == MIRType::Int32) {
+    define(new (alloc()) LNegI(useRegisterAtStart(ins->input())), ins);
+  } else if (ins->type() == MIRType::Float32) {
+    define(new (alloc()) LNegF(useRegisterAtStart(ins->input())), ins);
+  } else {
+    MOZ_ASSERT(ins->type() == MIRType::Double);
+    define(new (alloc()) LNegD(useRegisterAtStart(ins->input())), ins);
+  }
+}
+
+void LIRGeneratorARM::lowerUDiv(MDiv* div) {
+  MDefinition* lhs = div->getOperand(0);
+  MDefinition* rhs = div->getOperand(1);
+
+  if (HasIDIV()) {
+    LUDiv* lir = new (alloc()) LUDiv;
+    lir->setOperand(0, useRegister(lhs));
+    lir->setOperand(1, useRegister(rhs));
+    if (div->fallible()) {
+      assignSnapshot(lir, div->bailoutKind());
+    }
+    define(lir, div);
+    return;
+  }
+
+  LSoftUDivOrMod* lir = new (alloc())
+      LSoftUDivOrMod(useFixedAtStart(lhs, r0), useFixedAtStart(rhs, r1));
+
+  if (div->fallible()) {
+    assignSnapshot(lir, div->bailoutKind());
+  }
+
+  defineReturn(lir, div);
+}
+
+void LIRGeneratorARM::lowerUMod(MMod* mod) {
+  MDefinition* lhs = mod->getOperand(0);
+  MDefinition* rhs = mod->getOperand(1);
+
+  if (HasIDIV()) {
+    LUMod* lir = new (alloc()) LUMod;
+    lir->setOperand(0, useRegister(lhs));
+    lir->setOperand(1, useRegister(rhs));
+    if (mod->fallible()) {
+      assignSnapshot(lir, mod->bailoutKind());
+    }
+    define(lir, mod);
+    return;
+  }
+
+  LSoftUDivOrMod* lir = new (alloc())
+      LSoftUDivOrMod(useFixedAtStart(lhs, r0), useFixedAtStart(rhs, r1));
+
+  if (mod->fallible()) {
+    assignSnapshot(lir, mod->bailoutKind());
+  }
+
+  defineReturn(lir, mod);
+}
+
+void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) {
+  MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
+  LWasmUint32ToDouble* lir =
+      new (alloc()) LWasmUint32ToDouble(useRegisterAtStart(ins->input()));
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) {
+  MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
+  LWasmUint32ToFloat32* lir =
+      new (alloc()) LWasmUint32ToFloat32(useRegisterAtStart(ins->input()));
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmHeapBase(MWasmHeapBase* ins) {
+  auto* lir = new (alloc()) LWasmHeapBase(LAllocation());
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmLoad(MWasmLoad* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  if (ins->access().type() == Scalar::Int64 && ins->access().isAtomic()) {
+    auto* lir = new (alloc()) LWasmAtomicLoadI64(useRegisterAtStart(base));
+    defineInt64Fixed(lir, ins,
+                     LInt64Allocation(LAllocation(AnyRegister(IntArgReg1)),
+                                      LAllocation(AnyRegister(IntArgReg0))));
+    return;
+  }
+
+  LAllocation ptr = useRegisterAtStart(base);
+
+  if (ins->type() == MIRType::Int64) {
+    auto* lir = new (alloc()) LWasmLoadI64(ptr);
+    if (ins->access().offset() || ins->access().type() == Scalar::Int64) {
+      lir->setTemp(0, tempCopy(base, 0));
+    }
+    defineInt64(lir, ins);
+    return;
+  }
+
+  auto* lir = new (alloc()) LWasmLoad(ptr);
+  if (ins->access().offset()) {
+    lir->setTemp(0, tempCopy(base, 0));
+  }
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmStore(MWasmStore* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  if (ins->access().type() == Scalar::Int64 && ins->access().isAtomic()) {
+    auto* lir = new (alloc()) LWasmAtomicStoreI64(
+        useRegister(base),
+        useInt64Fixed(ins->value(), Register64(IntArgReg1, IntArgReg0)),
+        tempFixed(IntArgReg2), tempFixed(IntArgReg3));
+    add(lir, ins);
+    return;
+  }
+
+  LAllocation ptr = useRegisterAtStart(base);
+
+  if (ins->value()->type() == MIRType::Int64) {
+    LInt64Allocation value = useInt64RegisterAtStart(ins->value());
+    auto* lir = new (alloc()) LWasmStoreI64(ptr, value);
+    if (ins->access().offset() || ins->access().type() == Scalar::Int64) {
+      lir->setTemp(0, tempCopy(base, 0));
+    }
+    add(lir, ins);
+    return;
+  }
+
+  LAllocation value = useRegisterAtStart(ins->value());
+  auto* lir = new (alloc()) LWasmStore(ptr, value);
+
+  if (ins->access().offset()) {
+    lir->setTemp(0, tempCopy(base, 0));
+  }
+
+  add(lir, ins);
+}
+
+void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  // For the ARM it is best to keep the 'base' in a register if a bounds check
+  // is needed.
+  LAllocation baseAlloc;
+  LAllocation limitAlloc;
+
+  if (base->isConstant() && !ins->needsBoundsCheck()) {
+    // A bounds check is only skipped for a positive index.
+    MOZ_ASSERT(base->toConstant()->toInt32() >= 0);
+    baseAlloc = LAllocation(base->toConstant());
+  } else {
+    baseAlloc = useRegisterAtStart(base);
+    if (ins->needsBoundsCheck()) {
+      MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+      MOZ_ASSERT(boundsCheckLimit->type() == MIRType::Int32);
+      limitAlloc = useRegisterAtStart(boundsCheckLimit);
+    }
+  }
+
+  define(new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, LAllocation()),
+         ins);
+}
+
+void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  LAllocation baseAlloc;
+  LAllocation limitAlloc;
+
+  if (base->isConstant() && !ins->needsBoundsCheck()) {
+    MOZ_ASSERT(base->toConstant()->toInt32() >= 0);
+    baseAlloc = LAllocation(base->toConstant());
+  } else {
+    baseAlloc = useRegisterAtStart(base);
+    if (ins->needsBoundsCheck()) {
+      MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+      MOZ_ASSERT(boundsCheckLimit->type() == MIRType::Int32);
+      limitAlloc = useRegisterAtStart(boundsCheckLimit);
+    }
+  }
+
+  add(new (alloc()) LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
+                                    limitAlloc, LAllocation()),
+      ins);
+}
+
+void LIRGeneratorARM::lowerTruncateDToInt32(MTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Double);
+
+  define(new (alloc())
+             LTruncateDToInt32(useRegister(opd), LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGeneratorARM::lowerTruncateFToInt32(MTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Float32);
+
+  define(new (alloc())
+             LTruncateFToInt32(useRegister(opd), LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGenerator::visitAtomicExchangeTypedArrayElement(
+    MAtomicExchangeTypedArrayElement* ins) {
+  MOZ_ASSERT(HasLDSTREXBHD());
+
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+  const LAllocation value = useRegister(ins->value());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    // The two register pairs must be distinct.
+    LInt64Definition temp1 = tempInt64Fixed(Register64(IntArgReg3, IntArgReg2));
+    LDefinition temp2 = tempFixed(IntArgReg1);
+
+    auto* lir = new (alloc()) LAtomicExchangeTypedArrayElement64(
+        elements, index, value, temp1, temp2);
+    defineFixed(lir, ins, LAllocation(AnyRegister(IntArgReg0)));
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
+
+  // If the target is a floating register then we need a temp at the
+  // CodeGenerator level for creating the result.
+
+  LDefinition tempDef = LDefinition::BogusTemp();
+  if (ins->arrayType() == Scalar::Uint32) {
+    MOZ_ASSERT(ins->type() == MIRType::Double);
+    tempDef = temp();
+  }
+
+  LAtomicExchangeTypedArrayElement* lir = new (alloc())
+      LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitAtomicTypedArrayElementBinop(
+    MAtomicTypedArrayElementBinop* ins) {
+  MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+  const LAllocation value = useRegister(ins->value());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    // Wasm additionally pins the value register to `FetchOpVal64`, but it's
+    // unclear why this was deemed necessary.
+    LInt64Definition temp1 = tempInt64();
+    LInt64Definition temp2 = tempInt64Fixed(FetchOpTmp64);
+
+    if (ins->isForEffect()) {
+      auto* lir = new (alloc()) LAtomicTypedArrayElementBinopForEffect64(
+          elements, index, value, temp1, temp2);
+      add(lir, ins);
+      return;
+    }
+
+    LInt64Definition temp3 = tempInt64Fixed(FetchOpOut64);
+
+    auto* lir = new (alloc()) LAtomicTypedArrayElementBinop64(
+        elements, index, value, temp1, temp2, temp3);
+    define(lir, ins);
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  if (ins->isForEffect()) {
+    LAtomicTypedArrayElementBinopForEffect* lir = new (alloc())
+        LAtomicTypedArrayElementBinopForEffect(elements, index, value,
+                                               /* flagTemp= */ temp());
+    add(lir, ins);
+    return;
+  }
+
+  // For a Uint32Array with a known double result we need a temp for
+  // the intermediate output.
+  //
+  // Optimization opportunity (bug 1077317): We can do better by
+  // allowing 'value' to remain as an imm32 if it is small enough to
+  // fit in an instruction.
+
+  LDefinition flagTemp = temp();
+  LDefinition outTemp = LDefinition::BogusTemp();
+
+  if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+    outTemp = temp();
+  }
+
+  // On arm, map flagTemp to temp1 and outTemp to temp2, at least for now.
+
+  LAtomicTypedArrayElementBinop* lir = new (alloc())
+      LAtomicTypedArrayElementBinop(elements, index, value, flagTemp, outTemp);
+  define(lir, ins);
+}
+
+void LIRGenerator::visitCompareExchangeTypedArrayElement(
+    MCompareExchangeTypedArrayElement* ins) {
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+  const LAllocation newval = useRegister(ins->newval());
+  const LAllocation oldval = useRegister(ins->oldval());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    // The three register pairs must be distinct.
+    LInt64Definition temp1 = tempInt64Fixed(CmpXchgOld64);
+    LInt64Definition temp2 = tempInt64Fixed(CmpXchgNew64);
+    LInt64Definition temp3 = tempInt64Fixed(CmpXchgOut64);
+
+    auto* lir = new (alloc()) LCompareExchangeTypedArrayElement64(
+        elements, index, oldval, newval, temp1, temp2, temp3);
+    define(lir, ins);
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  // If the target is a floating register then we need a temp at the
+  // CodeGenerator level for creating the result.
+  //
+  // Optimization opportunity (bug 1077317): We could do better by
+  // allowing oldval to remain an immediate, if it is small enough
+  // to fit in an instruction.
+
+  LDefinition tempDef = LDefinition::BogusTemp();
+  if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+    tempDef = temp();
+  }
+
+  LCompareExchangeTypedArrayElement* lir =
+      new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
+                                                      newval, tempDef);
+
+  define(lir, ins);
+}
+
+void LIRGeneratorARM::lowerAtomicLoad64(MLoadUnboxedScalar* ins) {
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->storageType());
+
+  auto* lir = new (alloc())
+      LAtomicLoad64(elements, index, temp(),
+                    tempInt64Fixed(Register64(IntArgReg1, IntArgReg0)));
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM::lowerAtomicStore64(MStoreUnboxedScalar* ins) {
+  LUse elements = useRegister(ins->elements());
+  LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->writeType());
+  LAllocation value = useRegister(ins->value());
+  LInt64Definition temp1 = tempInt64Fixed(Register64(IntArgReg1, IntArgReg0));
+  LInt64Definition temp2 = tempInt64Fixed(Register64(IntArgReg3, IntArgReg2));
+
+  add(new (alloc()) LAtomicStore64(elements, index, value, temp1, temp2), ins);
+}
+
+void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  if (ins->access().type() == Scalar::Int64) {
+    // The three register pairs must be distinct.
+    auto* lir = new (alloc()) LWasmCompareExchangeI64(
+        useRegister(base), useInt64Fixed(ins->oldValue(), CmpXchgOld64),
+        useInt64Fixed(ins->newValue(), CmpXchgNew64));
+    defineInt64Fixed(lir, ins,
+                     LInt64Allocation(LAllocation(AnyRegister(CmpXchgOutHi)),
+                                      LAllocation(AnyRegister(CmpXchgOutLo))));
+    return;
+  }
+
+  MOZ_ASSERT(ins->access().type() < Scalar::Float32);
+  MOZ_ASSERT(HasLDSTREXBHD(), "by HasPlatformSupport() constraints");
+
+  LWasmCompareExchangeHeap* lir = new (alloc())
+      LWasmCompareExchangeHeap(useRegister(base), useRegister(ins->oldValue()),
+                               useRegister(ins->newValue()));
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) {
+  MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
+
+  if (ins->access().type() == Scalar::Int64) {
+    auto* lir = new (alloc()) LWasmAtomicExchangeI64(
+        useRegister(ins->base()), useInt64Fixed(ins->value(), XchgNew64),
+        ins->access());
+    defineInt64Fixed(lir, ins,
+                     LInt64Allocation(LAllocation(AnyRegister(XchgOutHi)),
+                                      LAllocation(AnyRegister(XchgOutLo))));
+    return;
+  }
+
+  MOZ_ASSERT(ins->access().type() < Scalar::Float32);
+  MOZ_ASSERT(HasLDSTREXBHD(), "by HasPlatformSupport() constraints");
+
+  const LAllocation base = useRegister(ins->base());
+  const LAllocation value = useRegister(ins->value());
+  define(new (alloc()) LWasmAtomicExchangeHeap(base, value), ins);
+}
+
+void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) {
+  if (ins->access().type() == Scalar::Int64) {
+    auto* lir = new (alloc()) LWasmAtomicBinopI64(
+        useRegister(ins->base()), useInt64Fixed(ins->value(), FetchOpVal64),
+        tempFixed(FetchOpTmpLo), tempFixed(FetchOpTmpHi), ins->access(),
+        ins->operation());
+    defineInt64Fixed(lir, ins,
+                     LInt64Allocation(LAllocation(AnyRegister(FetchOpOutHi)),
+                                      LAllocation(AnyRegister(FetchOpOutLo))));
+    return;
+  }
+
+  MOZ_ASSERT(ins->access().type() < Scalar::Float32);
+  MOZ_ASSERT(HasLDSTREXBHD(), "by HasPlatformSupport() constraints");
+
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  if (!ins->hasUses()) {
+    LWasmAtomicBinopHeapForEffect* lir =
+        new (alloc()) LWasmAtomicBinopHeapForEffect(useRegister(base),
+                                                    useRegister(ins->value()),
+                                                    /* flagTemp= */ temp());
+    add(lir, ins);
+    return;
+  }
+
+  LWasmAtomicBinopHeap* lir = new (alloc())
+      LWasmAtomicBinopHeap(useRegister(base), useRegister(ins->value()),
+                           /* temp = */ LDefinition::BogusTemp(),
+                           /* flagTemp= */ temp());
+  define(lir, ins);
+}
+
+void LIRGenerator::visitSubstr(MSubstr* ins) {
+  LSubstr* lir = new (alloc())
+      LSubstr(useRegister(ins->string()), useRegister(ins->begin()),
+              useRegister(ins->length()), temp(), temp(), tempByteOpRegister());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) {
+  MOZ_CRASH("We don't use MWasmTruncateToInt64 for arm");
+}
+
+void LIRGeneratorARM::lowerWasmBuiltinTruncateToInt64(
+    MWasmBuiltinTruncateToInt64* ins) {
+  MDefinition* opd = ins->input();
+  MDefinition* instance = ins->instance();
+  MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+  defineReturn(new (alloc())
+                   LWasmTruncateToInt64(useRegisterAtStart(opd),
+                                        useFixedAtStart(instance, InstanceReg)),
+               ins);
+}
+
+void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) {
+  MOZ_CRASH("We use BuiltinInt64ToFloatingPoint instead.");
+}
+
+void LIRGeneratorARM::lowerBuiltinInt64ToFloatingPoint(
+    MBuiltinInt64ToFloatingPoint* ins) {
+  MOZ_ASSERT(ins->type() == MIRType::Double || ins->type() == MIRType::Float32);
+
+  auto* lir = new (alloc())
+      LInt64ToFloatingPointCall(useInt64RegisterAtStart(ins->input()),
+                                useFixedAtStart(ins->instance(), InstanceReg));
+  defineReturn(lir, ins);
+}
+
+void LIRGenerator::visitCopySign(MCopySign* ins) {
+  MDefinition* lhs = ins->lhs();
+  MDefinition* rhs = ins->rhs();
+
+  MOZ_ASSERT(IsFloatingPointType(lhs->type()));
+  MOZ_ASSERT(lhs->type() == rhs->type());
+  MOZ_ASSERT(lhs->type() == ins->type());
+
+  LInstructionHelper<1, 2, 2>* lir;
+  if (lhs->type() == MIRType::Double) {
+    lir = new (alloc()) LCopySignD();
+  } else {
+    lir = new (alloc()) LCopySignF();
+  }
+
+  lir->setTemp(0, temp());
+  lir->setTemp(1, temp());
+
+  lowerForFPU(lir, ins, lhs, rhs);
+}
+
+void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) {
+  auto* lir =
+      new (alloc()) LExtendInt32ToInt64(useRegisterAtStart(ins->input()));
+  defineInt64(lir, ins);
+
+  LDefinition def(LDefinition::GENERAL, LDefinition::MUST_REUSE_INPUT);
+  def.setReusedInput(0);
+  def.setVirtualRegister(ins->virtualRegister());
+
+  lir->setDef(0, def);
+}
+
+void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) {
+  defineInt64(new (alloc())
+                  LSignExtendInt64(useInt64RegisterAtStart(ins->input())),
+              ins);
+}
+
+// On arm we specialize the only cases where compare is {U,}Int32 and select
+// is {U,}Int32.
+bool LIRGeneratorShared::canSpecializeWasmCompareAndSelect(
+    MCompare::CompareType compTy, MIRType insTy) {
+  return insTy == MIRType::Int32 && (compTy == MCompare::Compare_Int32 ||
+                                     compTy == MCompare::Compare_UInt32);
+}
+
+void LIRGeneratorShared::lowerWasmCompareAndSelect(MWasmSelect* ins,
+                                                   MDefinition* lhs,
+                                                   MDefinition* rhs,
+                                                   MCompare::CompareType compTy,
+                                                   JSOp jsop) {
+  MOZ_ASSERT(canSpecializeWasmCompareAndSelect(compTy, ins->type()));
+  auto* lir = new (alloc()) LWasmCompareAndSelect(
+      useRegister(lhs), useRegister(rhs), compTy, jsop,
+      useRegisterAtStart(ins->trueExpr()), useRegister(ins->falseExpr()));
+  defineReuseInput(lir, ins, LWasmCompareAndSelect::IfTrueExprIndex);
+}
+
+void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) {
+  MOZ_CRASH("ternary SIMD NYI");
+}
+
+void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
+  MOZ_CRASH("binary SIMD NYI");
+}
+
+#ifdef ENABLE_WASM_SIMD
+bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle(
+    int8_t shuffle[16]) {
+  return false;
+}
+bool MWasmTernarySimd128::canRelaxBitselect() { return false; }
+
+bool MWasmBinarySimd128::canPmaddubsw() { return false; }
+#endif
+
+bool MWasmBinarySimd128::specializeForConstantRhs() {
+  // Probably many we want to do here
+  return false;
+}
+
+void LIRGenerator::visitWasmBinarySimd128WithConstant(
+    MWasmBinarySimd128WithConstant* ins) {
+  MOZ_CRASH("binary SIMD with constant NYI");
+}
+
+void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
+  MOZ_CRASH("shift SIMD NYI");
+}
+
+void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
+  MOZ_CRASH("shuffle SIMD NYI");
+}
+
+void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
+  MOZ_CRASH("replace-lane SIMD NYI");
+}
+
+void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
+  MOZ_CRASH("scalar-to-SIMD NYI");
+}
+
+void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
+  MOZ_CRASH("unary SIMD NYI");
+}
+
+void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
+  MOZ_CRASH("reduce-SIMD NYI");
+}
+
+void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
+  MOZ_CRASH("load-lane SIMD NYI");
+}
+
+void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
+  MOZ_CRASH("store-lane SIMD NYI");
+}
diff --git a/js/src/jit/arm/Lowering-arm.h b/js/src/jit/arm/Lowering-arm.h
new file mode 100644
index 0000000000..3f03d22941
--- /dev/null
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_Lowering_arm_h
+#define jit_arm_Lowering_arm_h
+
+#include "jit/shared/Lowering-shared.h"
+
+namespace js {
+namespace jit {
+
+class LIRGeneratorARM : public LIRGeneratorShared {
+ protected:
+  LIRGeneratorARM(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph)
+      : LIRGeneratorShared(gen, graph, lirGraph) {}
+
+  // Returns a box allocation with type set to reg1 and payload set to reg2.
+  LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register reg2,
+                             bool useAtStart = false);
+
+  // x86 has constraints on what registers can be formatted for 1-byte
+  // stores and loads; on ARM all registers are okay.
+  LAllocation useByteOpRegister(MDefinition* mir);
+  LAllocation useByteOpRegisterAtStart(MDefinition* mir);
+  LAllocation useByteOpRegisterOrNonDoubleConstant(MDefinition* mir);
+  LDefinition tempByteOpRegister();
+
+  inline LDefinition tempToUnbox() { return LDefinition::BogusTemp(); }
+
+  bool needTempForPostBarrier() { return false; }
+
+  void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
+                            size_t lirIndex);
+  void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
+                          size_t lirIndex);
+  void defineInt64Phi(MPhi* phi, size_t lirIndex);
+
+  void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                     MDefinition* lhs, MDefinition* rhs);
+  void lowerUrshD(MUrsh* mir);
+
+  void lowerPowOfTwoI(MPow* mir);
+
+  void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+                   MDefinition* input);
+  void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                   MDefinition* lhs, MDefinition* rhs);
+
+  void lowerForALUInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins,
+                        MDefinition* mir, MDefinition* input);
+  void lowerForALUInt64(
+      LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+      MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+  void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs,
+                        MDefinition* rhs);
+  template <size_t Temps>
+  void lowerForShiftInt64(
+      LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+      MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+  void lowerForCompareI64AndBranch(MTest* mir, MCompare* comp, JSOp op,
+                                   MDefinition* left, MDefinition* right,
+                                   MBasicBlock* ifTrue, MBasicBlock* ifFalse);
+
+  void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+                   MDefinition* src);
+  template <size_t Temps>
+  void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
+                   MDefinition* lhs, MDefinition* rhs);
+
+  void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins);
+  void lowerWasmBuiltinTruncateToInt64(MWasmBuiltinTruncateToInt64* ins);
+  void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
+                               MDefinition* lhs, MDefinition* rhs);
+  void lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32* ins);
+  void lowerTruncateDToInt32(MTruncateToInt32* ins);
+  void lowerTruncateFToInt32(MTruncateToInt32* ins);
+  void lowerDivI(MDiv* div);
+  void lowerModI(MMod* mod);
+  void lowerDivI64(MDiv* div);
+  void lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div);
+  void lowerModI64(MMod* mod);
+  void lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod);
+  void lowerUDivI64(MDiv* div);
+  void lowerUModI64(MMod* mod);
+  void lowerNegI(MInstruction* ins, MDefinition* input);
+  void lowerNegI64(MInstruction* ins, MDefinition* input);
+  void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs);
+  void lowerUDiv(MDiv* div);
+  void lowerUMod(MMod* mod);
+  void lowerWasmSelectI(MWasmSelect* select);
+  void lowerWasmSelectI64(MWasmSelect* select);
+
+  void lowerBigIntLsh(MBigIntLsh* ins);
+  void lowerBigIntRsh(MBigIntRsh* ins);
+  void lowerBigIntDiv(MBigIntDiv* ins);
+  void lowerBigIntMod(MBigIntMod* ins);
+
+  void lowerAtomicLoad64(MLoadUnboxedScalar* ins);
+  void lowerAtomicStore64(MStoreUnboxedScalar* ins);
+
+  LTableSwitch* newLTableSwitch(const LAllocation& in,
+                                const LDefinition& inputCopy,
+                                MTableSwitch* ins);
+  LTableSwitchV* newLTableSwitchV(MTableSwitch* ins);
+
+  void lowerPhi(MPhi* phi);
+};
+
+typedef LIRGeneratorARM LIRGeneratorSpecific;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_Lowering_arm_h */
diff --git a/js/src/jit/arm/MacroAssembler-arm-inl.h b/js/src/jit/arm/MacroAssembler-arm-inl.h
new file mode 100644
index 0000000000..94d323207e
--- /dev/null
+++ b/js/src/jit/arm/MacroAssembler-arm-inl.h
@@ -0,0 +1,2582 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_MacroAssembler_arm_inl_h
+#define jit_arm_MacroAssembler_arm_inl_h
+
+#include "jit/arm/MacroAssembler-arm.h"
+
+namespace js {
+namespace jit {
+
+//{{{ check_macroassembler_style
+
+void MacroAssembler::move64(Register64 src, Register64 dest) {
+  move32(src.low, dest.low);
+  move32(src.high, dest.high);
+}
+
+void MacroAssembler::move64(Imm64 imm, Register64 dest) {
+  move32(Imm32(imm.value & 0xFFFFFFFFL), dest.low);
+  move32(Imm32((imm.value >> 32) & 0xFFFFFFFFL), dest.high);
+}
+
+void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
+  ma_vxfer(src, dest);
+}
+
+void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
+  ma_vxfer(src, dest);
+}
+
+void MacroAssembler::move8SignExtend(Register src, Register dest) {
+  as_sxtb(dest, src, 0);
+}
+
+void MacroAssembler::move16SignExtend(Register src, Register dest) {
+  as_sxth(dest, src, 0);
+}
+
+void MacroAssembler::moveDoubleToGPR64(FloatRegister src, Register64 dest) {
+  ma_vxfer(src, dest.low, dest.high);
+}
+
+void MacroAssembler::moveGPR64ToDouble(Register64 src, FloatRegister dest) {
+  ma_vxfer(src.low, src.high, dest);
+}
+
+void MacroAssembler::move64To32(Register64 src, Register dest) {
+  if (src.low != dest) {
+    move32(src.low, dest);
+  }
+}
+
+void MacroAssembler::move32To64ZeroExtend(Register src, Register64 dest) {
+  if (src != dest.low) {
+    move32(src, dest.low);
+  }
+  move32(Imm32(0), dest.high);
+}
+
+void MacroAssembler::move8To64SignExtend(Register src, Register64 dest) {
+  as_sxtb(dest.low, src, 0);
+  ma_asr(Imm32(31), dest.low, dest.high);
+}
+
+void MacroAssembler::move16To64SignExtend(Register src, Register64 dest) {
+  as_sxth(dest.low, src, 0);
+  ma_asr(Imm32(31), dest.low, dest.high);
+}
+
+void MacroAssembler::move32To64SignExtend(Register src, Register64 dest) {
+  if (src != dest.low) {
+    move32(src, dest.low);
+  }
+  ma_asr(Imm32(31), dest.low, dest.high);
+}
+
+void MacroAssembler::move32SignExtendToPtr(Register src, Register dest) {
+  move32(src, dest);
+}
+
+void MacroAssembler::move32ZeroExtendToPtr(Register src, Register dest) {
+  move32(src, dest);
+}
+
+// ===============================================================
+// Load instructions
+
+void MacroAssembler::load32SignExtendToPtr(const Address& src, Register dest) {
+  load32(src, dest);
+}
+
+void MacroAssembler::loadAbiReturnAddress(Register dest) { movePtr(lr, dest); }
+
+// ===============================================================
+// Logical instructions
+
+void MacroAssembler::not32(Register reg) { ma_mvn(reg, reg); }
+
+void MacroAssembler::notPtr(Register reg) { ma_mvn(reg, reg); }
+
+void MacroAssembler::and32(Register src, Register dest) {
+  ma_and(src, dest, SetCC);
+}
+
+void MacroAssembler::and32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_and(imm, dest, scratch, SetCC);
+}
+
+void MacroAssembler::and32(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_and(imm, scratch, scratch2);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::and32(const Address& src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(src, scratch, scratch2);
+  ma_and(scratch, dest, SetCC);
+}
+
+void MacroAssembler::andPtr(Register src, Register dest) { ma_and(src, dest); }
+
+void MacroAssembler::andPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_and(imm, dest, scratch);
+}
+
+void MacroAssembler::and64(Imm64 imm, Register64 dest) {
+  if (imm.low().value != int32_t(0xFFFFFFFF)) {
+    and32(imm.low(), dest.low);
+  }
+  if (imm.hi().value != int32_t(0xFFFFFFFF)) {
+    and32(imm.hi(), dest.high);
+  }
+}
+
+void MacroAssembler::or64(Imm64 imm, Register64 dest) {
+  if (imm.low().value) {
+    or32(imm.low(), dest.low);
+  }
+  if (imm.hi().value) {
+    or32(imm.hi(), dest.high);
+  }
+}
+
+void MacroAssembler::xor64(Imm64 imm, Register64 dest) {
+  if (imm.low().value) {
+    xor32(imm.low(), dest.low);
+  }
+  if (imm.hi().value) {
+    xor32(imm.hi(), dest.high);
+  }
+}
+
+void MacroAssembler::or32(Register src, Register dest) { ma_orr(src, dest); }
+
+void MacroAssembler::or32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_orr(imm, dest, scratch);
+}
+
+void MacroAssembler::or32(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_orr(imm, scratch, scratch2);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::orPtr(Register src, Register dest) { ma_orr(src, dest); }
+
+void MacroAssembler::orPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_orr(imm, dest, scratch);
+}
+
+void MacroAssembler::and64(Register64 src, Register64 dest) {
+  and32(src.low, dest.low);
+  and32(src.high, dest.high);
+}
+
+void MacroAssembler::or64(Register64 src, Register64 dest) {
+  or32(src.low, dest.low);
+  or32(src.high, dest.high);
+}
+
+void MacroAssembler::xor64(Register64 src, Register64 dest) {
+  ma_eor(src.low, dest.low);
+  ma_eor(src.high, dest.high);
+}
+
+void MacroAssembler::xor32(Register src, Register dest) {
+  ma_eor(src, dest, SetCC);
+}
+
+void MacroAssembler::xor32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_eor(imm, dest, scratch, SetCC);
+}
+
+void MacroAssembler::xor32(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_eor(imm, scratch, scratch2);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::xor32(const Address& src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(src, scratch, scratch2);
+  ma_eor(scratch, dest, SetCC);
+}
+
+void MacroAssembler::xorPtr(Register src, Register dest) { ma_eor(src, dest); }
+
+void MacroAssembler::xorPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_eor(imm, dest, scratch);
+}
+
+// ===============================================================
+// Swap instructions
+
+void MacroAssembler::byteSwap16SignExtend(Register reg) { as_revsh(reg, reg); }
+
+void MacroAssembler::byteSwap16ZeroExtend(Register reg) {
+  as_rev16(reg, reg);
+  as_uxth(reg, reg, 0);
+}
+
+void MacroAssembler::byteSwap32(Register reg) { as_rev(reg, reg); }
+
+void MacroAssembler::byteSwap64(Register64 reg) {
+  as_rev(reg.high, reg.high);
+  as_rev(reg.low, reg.low);
+
+  ScratchRegisterScope scratch(*this);
+  ma_mov(reg.high, scratch);
+  ma_mov(reg.low, reg.high);
+  ma_mov(scratch, reg.low);
+}
+
+// ===============================================================
+// Arithmetic functions
+
+void MacroAssembler::add32(Register src, Register dest) {
+  ma_add(src, dest, SetCC);
+}
+
+void MacroAssembler::add32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm, dest, scratch, SetCC);
+}
+
+void MacroAssembler::add32(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_add(imm, scratch, scratch2, SetCC);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::addPtr(Register src, Register dest) { ma_add(src, dest); }
+
+void MacroAssembler::addPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm, dest, scratch);
+}
+
+void MacroAssembler::addPtr(ImmWord imm, Register dest) {
+  addPtr(Imm32(imm.value), dest);
+}
+
+void MacroAssembler::addPtr(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_add(imm, scratch, scratch2);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::addPtr(const Address& src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(src, scratch, scratch2);
+  ma_add(scratch, dest, SetCC);
+}
+
+void MacroAssembler::add64(Register64 src, Register64 dest) {
+  ma_add(src.low, dest.low, SetCC);
+  ma_adc(src.high, dest.high);
+}
+
+void MacroAssembler::add64(Imm32 imm, Register64 dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm, dest.low, scratch, SetCC);
+  as_adc(dest.high, dest.high, Imm8(0), LeaveCC);
+}
+
+void MacroAssembler::add64(Imm64 imm, Register64 dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm.low(), dest.low, scratch, SetCC);
+  ma_adc(imm.hi(), dest.high, scratch, LeaveCC);
+}
+
+CodeOffset MacroAssembler::sub32FromStackPtrWithPatch(Register dest) {
+  ScratchRegisterScope scratch(*this);
+  CodeOffset offs = CodeOffset(currentOffset());
+  ma_movPatchable(Imm32(0), scratch, Always);
+  ma_sub(getStackPointer(), scratch, dest);
+  return offs;
+}
+
+void MacroAssembler::patchSub32FromStackPtr(CodeOffset offset, Imm32 imm) {
+  ScratchRegisterScope scratch(*this);
+  BufferInstructionIterator iter(BufferOffset(offset.offset()), &m_buffer);
+  iter.maybeSkipAutomaticInstructions();
+  ma_mov_patch(imm, scratch, Always, HasMOVWT() ? L_MOVWT : L_LDR, iter);
+}
+
+void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) {
+  ma_vadd(dest, src, dest);
+}
+
+void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vadd_f32(dest, src, dest);
+}
+
+void MacroAssembler::sub32(Register src, Register dest) {
+  ma_sub(src, dest, SetCC);
+}
+
+void MacroAssembler::sub32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_sub(imm, dest, scratch, SetCC);
+}
+
+void MacroAssembler::sub32(const Address& src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(src, scratch, scratch2);
+  ma_sub(scratch, dest, SetCC);
+}
+
+void MacroAssembler::subPtr(Register src, Register dest) { ma_sub(src, dest); }
+
+void MacroAssembler::subPtr(Register src, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_sub(src, scratch);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::subPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_sub(imm, dest, scratch);
+}
+
+void MacroAssembler::subPtr(const Address& addr, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(addr, scratch, scratch2);
+  ma_sub(scratch, dest);
+}
+
+void MacroAssembler::sub64(Register64 src, Register64 dest) {
+  ma_sub(src.low, dest.low, SetCC);
+  ma_sbc(src.high, dest.high, LeaveCC);
+}
+
+void MacroAssembler::sub64(Imm64 imm, Register64 dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_sub(imm.low(), dest.low, scratch, SetCC);
+  ma_sbc(imm.hi(), dest.high, scratch, LeaveCC);
+}
+
+void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) {
+  ma_vsub(dest, src, dest);
+}
+
+void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vsub_f32(dest, src, dest);
+}
+
+void MacroAssembler::mul32(Register rhs, Register srcDest) {
+  as_mul(srcDest, srcDest, rhs);
+}
+
+void MacroAssembler::mul32(Imm32 imm, Register srcDest) {
+  ScratchRegisterScope scratch(*this);
+  move32(imm, scratch);
+  mul32(scratch, srcDest);
+}
+
+void MacroAssembler::mulHighUnsigned32(Imm32 imm, Register src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_umull(src, imm, dest, scratch, scratch);
+}
+
+void MacroAssembler::mulPtr(Register rhs, Register srcDest) {
+  as_mul(srcDest, srcDest, rhs);
+}
+
+void MacroAssembler::mul64(Imm64 imm, const Register64& dest) {
+  // LOW32  = LOW(LOW(dest) * LOW(imm));
+  // HIGH32 = LOW(HIGH(dest) * LOW(imm)) [multiply imm into upper bits]
+  //        + LOW(LOW(dest) * HIGH(imm)) [multiply dest into upper bits]
+  //        + HIGH(LOW(dest) * LOW(imm)) [carry]
+
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // HIGH(dest) = LOW(HIGH(dest) * LOW(imm));
+  ma_mov(Imm32(imm.value & 0xFFFFFFFFL), scratch);
+  as_mul(dest.high, dest.high, scratch);
+
+  // high:low = LOW(dest) * LOW(imm);
+  as_umull(scratch2, scratch, dest.low, scratch);
+
+  // HIGH(dest) += high;
+  as_add(dest.high, dest.high, O2Reg(scratch2));
+
+  // HIGH(dest) += LOW(LOW(dest) * HIGH(imm));
+  if (((imm.value >> 32) & 0xFFFFFFFFL) == 5) {
+    as_add(scratch2, dest.low, lsl(dest.low, 2));
+  } else {
+    MOZ_CRASH("Not supported imm");
+  }
+  as_add(dest.high, dest.high, O2Reg(scratch2));
+
+  // LOW(dest) = low;
+  ma_mov(scratch, dest.low);
+}
+
+void MacroAssembler::mul64(Imm64 imm, const Register64& dest,
+                           const Register temp) {
+  // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
+  // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
+  //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
+  //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
+
+  MOZ_ASSERT(temp != dest.high && temp != dest.low);
+
+  // Compute mul64
+  ScratchRegisterScope scratch(*this);
+  ma_mul(dest.high, imm.low(), dest.high, scratch);  // (2)
+  ma_mul(dest.low, imm.hi(), temp, scratch);         // (3)
+  ma_add(dest.high, temp, temp);
+  ma_umull(dest.low, imm.low(), dest.high, dest.low, scratch);  // (4) + (1)
+  ma_add(temp, dest.high, dest.high);
+}
+
+void MacroAssembler::mul64(const Register64& src, const Register64& dest,
+                           const Register temp) {
+  // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
+  // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
+  //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
+  //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
+
+  MOZ_ASSERT(dest != src);
+  MOZ_ASSERT(dest.low != src.high && dest.high != src.low);
+
+  // Compute mul64
+  ma_mul(dest.high, src.low, dest.high);  // (2)
+  ma_mul(src.high, dest.low, temp);       // (3)
+  ma_add(dest.high, temp, temp);
+  ma_umull(dest.low, src.low, dest.high, dest.low);  // (4) + (1)
+  ma_add(temp, dest.high, dest.high);
+}
+
+void MacroAssembler::mulBy3(Register src, Register dest) {
+  as_add(dest, src, lsl(src, 1));
+}
+
+void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vmul_f32(dest, src, dest);
+}
+
+void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) {
+  ma_vmul(dest, src, dest);
+}
+
+void MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp,
+                                  FloatRegister dest) {
+  ScratchRegisterScope scratch(*this);
+  ScratchDoubleScope scratchDouble(*this);
+
+  movePtr(imm, scratch);
+  ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), scratchDouble);
+  mulDouble(scratchDouble, dest);
+}
+
+void MacroAssembler::quotient32(Register rhs, Register srcDest,
+                                bool isUnsigned) {
+  MOZ_ASSERT(HasIDIV());
+  if (isUnsigned) {
+    ma_udiv(srcDest, rhs, srcDest);
+  } else {
+    ma_sdiv(srcDest, rhs, srcDest);
+  }
+}
+
+void MacroAssembler::remainder32(Register rhs, Register srcDest,
+                                 bool isUnsigned) {
+  MOZ_ASSERT(HasIDIV());
+
+  ScratchRegisterScope scratch(*this);
+  if (isUnsigned) {
+    ma_umod(srcDest, rhs, srcDest, scratch);
+  } else {
+    ma_smod(srcDest, rhs, srcDest, scratch);
+  }
+}
+
+void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vdiv_f32(dest, src, dest);
+}
+
+void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) {
+  ma_vdiv(dest, src, dest);
+}
+
+void MacroAssembler::inc64(AbsoluteAddress dest) {
+  ScratchRegisterScope scratch(*this);
+
+  ma_strd(r0, r1, EDtrAddr(sp, EDtrOffImm(-8)), PreIndex);
+
+  ma_mov(Imm32((int32_t)dest.addr), scratch);
+  ma_ldrd(EDtrAddr(scratch, EDtrOffImm(0)), r0, r1);
+
+  as_add(r0, r0, Imm8(1), SetCC);
+  as_adc(r1, r1, Imm8(0), LeaveCC);
+
+  ma_strd(r0, r1, EDtrAddr(scratch, EDtrOffImm(0)));
+  ma_ldrd(EDtrAddr(sp, EDtrOffImm(8)), r0, r1, PostIndex);
+}
+
+void MacroAssembler::neg32(Register reg) { ma_neg(reg, reg, SetCC); }
+
+void MacroAssembler::neg64(Register64 reg) {
+  as_rsb(reg.low, reg.low, Imm8(0), SetCC);
+  as_rsc(reg.high, reg.high, Imm8(0));
+}
+
+void MacroAssembler::negPtr(Register reg) { neg32(reg); }
+
+void MacroAssembler::negateDouble(FloatRegister reg) { ma_vneg(reg, reg); }
+
+void MacroAssembler::negateFloat(FloatRegister reg) { ma_vneg_f32(reg, reg); }
+
+void MacroAssembler::abs32(Register src, Register dest) {
+  as_cmp(src, Imm8(0));
+  as_rsb(dest, src, Imm8(0), LeaveCC, LessThan);
+  if (dest != src) {
+    as_mov(dest, O2Reg(src), LeaveCC, GreaterThanOrEqual);
+  }
+}
+
+void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vabs_f32(src, dest);
+}
+
+void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) {
+  ma_vabs(src, dest);
+}
+
+void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vsqrt_f32(src, dest);
+}
+
+void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) {
+  ma_vsqrt(src, dest);
+}
+
+void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest,
+                                bool handleNaN) {
+  minMaxFloat32(srcDest, other, handleNaN, false);
+}
+
+void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest,
+                               bool handleNaN) {
+  minMaxDouble(srcDest, other, handleNaN, false);
+}
+
+void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest,
+                                bool handleNaN) {
+  minMaxFloat32(srcDest, other, handleNaN, true);
+}
+
+void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest,
+                               bool handleNaN) {
+  minMaxDouble(srcDest, other, handleNaN, true);
+}
+
+// ===============================================================
+// Shift functions
+
+void MacroAssembler::lshiftPtr(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  ma_lsl(imm, dest, dest);
+}
+
+void MacroAssembler::lshiftPtr(Register src, Register dest) {
+  ma_lsl(src, dest, dest);
+}
+
+void MacroAssembler::lshift64(Imm32 imm, Register64 dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  if (imm.value == 0) {
+    return;
+  }
+
+  if (imm.value < 32) {
+    as_mov(dest.high, lsl(dest.high, imm.value));
+    as_orr(dest.high, dest.high, lsr(dest.low, 32 - imm.value));
+    as_mov(dest.low, lsl(dest.low, imm.value));
+  } else {
+    as_mov(dest.high, lsl(dest.low, imm.value - 32));
+    ma_mov(Imm32(0), dest.low);
+  }
+}
+
+void MacroAssembler::lshift64(Register unmaskedShift, Register64 dest) {
+  // dest.high = dest.high << shift | dest.low << shift - 32 | dest.low >> 32 -
+  // shift Note: one of the two dest.low shift will always yield zero due to
+  // negative shift.
+
+  ScratchRegisterScope shift(*this);
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.high, lsl(dest.high, shift));
+  as_sub(shift, shift, Imm8(32));
+  as_orr(dest.high, dest.high, lsl(dest.low, shift));
+  ma_neg(shift, shift);
+  as_orr(dest.high, dest.high, lsr(dest.low, shift));
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.low, lsl(dest.low, shift));
+}
+
+void MacroAssembler::lshift32(Register src, Register dest) {
+  ma_lsl(src, dest, dest);
+}
+
+void MacroAssembler::flexibleLshift32(Register src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  as_and(scratch, src, Imm8(0x1F));
+  lshift32(scratch, dest);
+}
+
+void MacroAssembler::lshift32(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  lshiftPtr(imm, dest);
+}
+
+void MacroAssembler::rshiftPtr(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  if (imm.value) {
+    ma_lsr(imm, dest, dest);
+  }
+}
+
+void MacroAssembler::rshiftPtr(Register src, Register dest) {
+  ma_lsr(src, dest, dest);
+}
+
+void MacroAssembler::rshift32(Register src, Register dest) {
+  ma_lsr(src, dest, dest);
+}
+
+void MacroAssembler::flexibleRshift32(Register src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  as_and(scratch, src, Imm8(0x1F));
+  rshift32(scratch, dest);
+}
+
+void MacroAssembler::rshift32(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  rshiftPtr(imm, dest);
+}
+
+void MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  if (imm.value) {
+    ma_asr(imm, dest, dest);
+  }
+}
+
+void MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  if (!imm.value) {
+    return;
+  }
+
+  if (imm.value < 32) {
+    as_mov(dest.low, lsr(dest.low, imm.value));
+    as_orr(dest.low, dest.low, lsl(dest.high, 32 - imm.value));
+    as_mov(dest.high, asr(dest.high, imm.value));
+  } else if (imm.value == 32) {
+    as_mov(dest.low, O2Reg(dest.high));
+    as_mov(dest.high, asr(dest.high, 31));
+  } else {
+    as_mov(dest.low, asr(dest.high, imm.value - 32));
+    as_mov(dest.high, asr(dest.high, 31));
+  }
+}
+
+void MacroAssembler::rshift64Arithmetic(Register unmaskedShift,
+                                        Register64 dest) {
+  Label proceed;
+
+  // dest.low = dest.low >>> shift | dest.high <<< 32 - shift
+  // if (shift - 32 >= 0)
+  //   dest.low |= dest.high >>> shift - 32
+  // Note: Negative shifts yield a zero as result, except for the signed
+  //       right shift. Therefore we need to test for it and only do it if
+  //       it isn't negative.
+  ScratchRegisterScope shift(*this);
+
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.low, lsr(dest.low, shift));
+  as_rsb(shift, shift, Imm8(32));
+  as_orr(dest.low, dest.low, lsl(dest.high, shift));
+  ma_neg(shift, shift, SetCC);
+  ma_b(&proceed, Signed);
+
+  as_orr(dest.low, dest.low, asr(dest.high, shift));
+
+  bind(&proceed);
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.high, asr(dest.high, shift));
+}
+
+void MacroAssembler::rshift32Arithmetic(Register src, Register dest) {
+  ma_asr(src, dest, dest);
+}
+
+void MacroAssembler::rshift32Arithmetic(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  rshiftPtrArithmetic(imm, dest);
+}
+
+void MacroAssembler::flexibleRshift32Arithmetic(Register src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  as_and(scratch, src, Imm8(0x1F));
+  rshift32Arithmetic(scratch, dest);
+}
+
+void MacroAssembler::rshift64(Imm32 imm, Register64 dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  if (!imm.value) {
+    return;
+  }
+
+  if (imm.value < 32) {
+    as_mov(dest.low, lsr(dest.low, imm.value));
+    as_orr(dest.low, dest.low, lsl(dest.high, 32 - imm.value));
+    as_mov(dest.high, lsr(dest.high, imm.value));
+  } else if (imm.value == 32) {
+    ma_mov(dest.high, dest.low);
+    ma_mov(Imm32(0), dest.high);
+  } else {
+    ma_lsr(Imm32(imm.value - 32), dest.high, dest.low);
+    ma_mov(Imm32(0), dest.high);
+  }
+}
+
+void MacroAssembler::rshift64(Register unmaskedShift, Register64 dest) {
+  // dest.low = dest.low >> shift | dest.high >> shift - 32 | dest.high << 32 -
+  // shift Note: one of the two dest.high shifts will always yield zero due to
+  // negative shift.
+
+  ScratchRegisterScope shift(*this);
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.low, lsr(dest.low, shift));
+  as_sub(shift, shift, Imm8(32));
+  as_orr(dest.low, dest.low, lsr(dest.high, shift));
+  ma_neg(shift, shift);
+  as_orr(dest.low, dest.low, lsl(dest.high, shift));
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.high, lsr(dest.high, shift));
+}
+
+// ===============================================================
+// Rotate functions
+void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) {
+  if (count.value) {
+    ma_rol(count, input, dest);
+  } else {
+    ma_mov(input, dest);
+  }
+}
+
+void MacroAssembler::rotateLeft(Register count, Register input, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_rol(count, input, dest, scratch);
+}
+
+void MacroAssembler::rotateLeft64(Imm32 count, Register64 input,
+                                  Register64 dest, Register temp) {
+  MOZ_ASSERT(temp == InvalidReg);
+  MOZ_ASSERT(input.low != dest.high && input.high != dest.low);
+
+  int32_t amount = count.value & 0x3f;
+  if (amount > 32) {
+    rotateRight64(Imm32(64 - amount), input, dest, temp);
+  } else {
+    ScratchRegisterScope scratch(*this);
+    if (amount == 0) {
+      ma_mov(input.low, dest.low);
+      ma_mov(input.high, dest.high);
+    } else if (amount == 32) {
+      ma_mov(input.low, scratch);
+      ma_mov(input.high, dest.low);
+      ma_mov(scratch, dest.high);
+    } else {
+      MOZ_ASSERT(0 < amount && amount < 32);
+      ma_mov(dest.high, scratch);
+      as_mov(dest.high, lsl(dest.high, amount));
+      as_orr(dest.high, dest.high, lsr(dest.low, 32 - amount));
+      as_mov(dest.low, lsl(dest.low, amount));
+      as_orr(dest.low, dest.low, lsr(scratch, 32 - amount));
+    }
+  }
+}
+
+void MacroAssembler::rotateLeft64(Register shift, Register64 src,
+                                  Register64 dest, Register temp) {
+  MOZ_ASSERT(shift != temp);
+  MOZ_ASSERT(src == dest);
+  MOZ_ASSERT(temp != src.low && temp != src.high);
+  MOZ_ASSERT(shift != src.low && shift != src.high);
+  MOZ_ASSERT(temp != InvalidReg);
+
+  ScratchRegisterScope shift_value(*this);
+  Label high, done;
+
+  ma_mov(src.high, temp);
+  as_and(shift_value, shift, Imm8(0x3f));
+  as_cmp(shift_value, Imm8(32));
+  ma_b(&high, GreaterThanOrEqual);
+
+  // high = high << shift | low >> 32 - shift
+  // low = low << shift | high >> 32 - shift
+  as_mov(dest.high, lsl(src.high, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.high, dest.high, lsr(src.low, shift_value));
+
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_mov(dest.low, lsl(src.low, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.low, dest.low, lsr(temp, shift_value));
+
+  ma_b(&done);
+
+  // A 32 - 64 shift is a 0 - 32 shift in the other direction.
+  bind(&high);
+  as_rsb(shift_value, shift_value, Imm8(64));
+
+  as_mov(dest.high, lsr(src.high, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.high, dest.high, lsl(src.low, shift_value));
+
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_mov(dest.low, lsr(src.low, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.low, dest.low, lsl(temp, shift_value));
+
+  bind(&done);
+}
+
+void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) {
+  if (count.value) {
+    ma_ror(count, input, dest);
+  } else {
+    ma_mov(input, dest);
+  }
+}
+
+void MacroAssembler::rotateRight(Register count, Register input,
+                                 Register dest) {
+  ma_ror(count, input, dest);
+}
+
+void MacroAssembler::rotateRight64(Imm32 count, Register64 input,
+                                   Register64 dest, Register temp) {
+  MOZ_ASSERT(temp == InvalidReg);
+  MOZ_ASSERT(input.low != dest.high && input.high != dest.low);
+
+  int32_t amount = count.value & 0x3f;
+  if (amount > 32) {
+    rotateLeft64(Imm32(64 - amount), input, dest, temp);
+  } else {
+    ScratchRegisterScope scratch(*this);
+    if (amount == 0) {
+      ma_mov(input.low, dest.low);
+      ma_mov(input.high, dest.high);
+    } else if (amount == 32) {
+      ma_mov(input.low, scratch);
+      ma_mov(input.high, dest.low);
+      ma_mov(scratch, dest.high);
+    } else {
+      MOZ_ASSERT(0 < amount && amount < 32);
+      ma_mov(dest.high, scratch);
+      as_mov(dest.high, lsr(dest.high, amount));
+      as_orr(dest.high, dest.high, lsl(dest.low, 32 - amount));
+      as_mov(dest.low, lsr(dest.low, amount));
+      as_orr(dest.low, dest.low, lsl(scratch, 32 - amount));
+    }
+  }
+}
+
+void MacroAssembler::rotateRight64(Register shift, Register64 src,
+                                   Register64 dest, Register temp) {
+  MOZ_ASSERT(shift != temp);
+  MOZ_ASSERT(src == dest);
+  MOZ_ASSERT(temp != src.low && temp != src.high);
+  MOZ_ASSERT(shift != src.low && shift != src.high);
+  MOZ_ASSERT(temp != InvalidReg);
+
+  ScratchRegisterScope shift_value(*this);
+  Label high, done;
+
+  ma_mov(src.high, temp);
+  as_and(shift_value, shift, Imm8(0x3f));
+  as_cmp(shift_value, Imm8(32));
+  ma_b(&high, GreaterThanOrEqual);
+
+  // high = high >> shift | low << 32 - shift
+  // low = low >> shift | high << 32 - shift
+  as_mov(dest.high, lsr(src.high, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.high, dest.high, lsl(src.low, shift_value));
+
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_mov(dest.low, lsr(src.low, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.low, dest.low, lsl(temp, shift_value));
+
+  ma_b(&done);
+
+  // A 32 - 64 shift is a 0 - 32 shift in the other direction.
+  bind(&high);
+  as_rsb(shift_value, shift_value, Imm8(64));
+
+  as_mov(dest.high, lsl(src.high, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.high, dest.high, lsr(src.low, shift_value));
+
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_mov(dest.low, lsl(src.low, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.low, dest.low, lsr(temp, shift_value));
+
+  bind(&done);
+}
+
+// ===============================================================
+// Condition functions
+
+void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs,
+                             Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load8{Zero,Sign}Extend() and cmp32Set() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  Imm32 imm(0);
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      imm = Imm32(uint8_t(rhs.value));
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      imm = Imm32(int8_t(rhs.value));
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  ma_dataTransferN(IsLoad, 8, isSigned, lhs.base, Imm32(lhs.offset), scratch,
+                   scratch2);
+  ma_cmp(scratch, imm, scratch2);
+  emitSet(cond, dest);
+}
+
+void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs,
+                              Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load16{Zero,Sign}Extend() and cmp32Set() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  Imm32 imm(0);
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      imm = Imm32(uint16_t(rhs.value));
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      imm = Imm32(int16_t(rhs.value));
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  ma_dataTransferN(IsLoad, 16, isSigned, lhs.base, Imm32(lhs.offset), scratch,
+                   scratch2);
+  ma_cmp(scratch, imm, scratch2);
+  emitSet(cond, dest);
+}
+
+template <typename T1, typename T2>
+void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) {
+  cmp32(lhs, rhs);
+  emitSet(cond, dest);
+}
+
+void MacroAssembler::cmp64Set(Condition cond, Address lhs, Imm64 rhs,
+                              Register dest) {
+  Label success, done;
+
+  branch64(cond, lhs, rhs, &success);
+  move32(Imm32(0), dest);
+  jump(&done);
+  bind(&success);
+  move32(Imm32(1), dest);
+  bind(&done);
+}
+
+template <typename T1, typename T2>
+void MacroAssembler::cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) {
+  cmpPtr(lhs, rhs);
+  emitSet(cond, dest);
+}
+
+// ===============================================================
+// Bit counting functions
+
+void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) {
+  ma_clz(src, dest);
+}
+
+void MacroAssembler::clz64(Register64 src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+
+  ma_clz(src.high, scratch);
+  as_cmp(scratch, Imm8(32));
+  ma_mov(scratch, dest, LeaveCC, NotEqual);
+  ma_clz(src.low, dest, Equal);
+  as_add(dest, dest, Imm8(32), LeaveCC, Equal);
+}
+
+void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) {
+  ScratchRegisterScope scratch(*this);
+  ma_ctz(src, dest, scratch);
+}
+
+void MacroAssembler::ctz64(Register64 src, Register dest) {
+  Label done, high;
+  as_cmp(src.low, Imm8(0));
+  ma_b(&high, Equal);
+
+  ctz32(src.low, dest, /* knownNotZero = */ true);
+  ma_b(&done);
+
+  bind(&high);
+  ctz32(src.high, dest, /* knownNotZero = */ false);
+  as_add(dest, dest, Imm8(32));
+
+  bind(&done);
+}
+
+void MacroAssembler::popcnt32(Register input, Register output, Register tmp) {
+  // Equivalent to GCC output of mozilla::CountPopulation32()
+
+  ScratchRegisterScope scratch(*this);
+
+  if (input != output) {
+    ma_mov(input, output);
+  }
+  as_mov(tmp, asr(output, 1));
+  ma_and(Imm32(0x55555555), tmp, scratch);
+  ma_sub(output, tmp, output);
+  as_mov(tmp, asr(output, 2));
+  ma_mov(Imm32(0x33333333), scratch);
+  ma_and(scratch, output);
+  ma_and(scratch, tmp);
+  ma_add(output, tmp, output);
+  as_add(output, output, lsr(output, 4));
+  ma_and(Imm32(0xF0F0F0F), output, scratch);
+  as_add(output, output, lsl(output, 8));
+  as_add(output, output, lsl(output, 16));
+  as_mov(output, asr(output, 24));
+}
+
+void MacroAssembler::popcnt64(Register64 src, Register64 dest, Register tmp) {
+  MOZ_ASSERT(dest.low != tmp);
+  MOZ_ASSERT(dest.high != tmp);
+  MOZ_ASSERT(dest.low != dest.high);
+  // The source and destination can overlap. Therefore make sure we don't
+  // clobber the source before we have the data.
+  if (dest.low != src.high) {
+    popcnt32(src.low, dest.low, tmp);
+    popcnt32(src.high, dest.high, tmp);
+  } else {
+    MOZ_ASSERT(dest.high != src.high);
+    popcnt32(src.low, dest.high, tmp);
+    popcnt32(src.high, dest.low, tmp);
+  }
+  ma_add(dest.high, dest.low);
+  ma_mov(Imm32(0), dest.high);
+}
+
+// ===============================================================
+// Branch functions
+
+void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs,
+                             Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load8{Zero,Sign}Extend() and branch32() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  Imm32 imm(0);
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      imm = Imm32(uint8_t(rhs.value));
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      imm = Imm32(int8_t(rhs.value));
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  ma_dataTransferN(IsLoad, 8, isSigned, lhs.base, Imm32(lhs.offset), scratch,
+                   scratch2);
+  ma_cmp(scratch, imm, scratch2);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs,
+                             Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load8{Zero,Sign}Extend() and branch32() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  if (isSigned) {
+    Register index = lhs.index;
+
+    // ARMv7 does not have LSL on an index register with an extended load.
+    if (lhs.scale != TimesOne) {
+      ma_lsl(Imm32::ShiftOf(lhs.scale), index, scratch);
+      index = scratch;
+    }
+
+    if (lhs.offset != 0) {
+      if (index != scratch) {
+        ma_mov(index, scratch);
+        index = scratch;
+      }
+      ma_add(Imm32(lhs.offset), index, scratch2);
+    }
+    ma_ldrsb(EDtrAddr(lhs.base, EDtrOffReg(index)), scratch);
+  } else {
+    Register base = lhs.base;
+    uint32_t scale = Imm32::ShiftOf(lhs.scale).value;
+
+    if (lhs.offset == 0) {
+      ma_ldrb(DTRAddr(base, DtrRegImmShift(lhs.index, LSL, scale)), scratch);
+    } else {
+      ma_add(base, Imm32(lhs.offset), scratch, scratch2);
+      ma_ldrb(DTRAddr(scratch, DtrRegImmShift(lhs.index, LSL, scale)), scratch);
+    }
+  }
+
+  ma_cmp(scratch, rhs);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs,
+                              Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load16{Zero,Sign}Extend() and branch32() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  Imm32 imm(0);
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      imm = Imm32(uint16_t(rhs.value));
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      imm = Imm32(int16_t(rhs.value));
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  ma_dataTransferN(IsLoad, 16, isSigned, lhs.base, Imm32(lhs.offset), scratch,
+                   scratch2);
+  ma_cmp(scratch, imm, scratch2);
+  ma_b(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs,
+                              L label) {
+  ma_cmp(lhs, rhs);
+  ma_b(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 rhs,
+                              L label) {
+  ScratchRegisterScope scratch(*this);
+
+  ma_cmp(lhs, rhs, scratch);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs,
+                              Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 rhs,
+                              Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs, scratch2);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
+                              Register rhs, Label* label) {
+  ScratchRegisterScope scratch(*this);
+
+  // Load into scratch.
+  movePtr(ImmWord(uintptr_t(lhs.addr)), scratch);
+  ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+  ma_cmp(scratch, rhs);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
+                              Imm32 rhs, Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Load into scratch.
+  movePtr(ImmWord(uintptr_t(lhs.addr)), scratch);
+  ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+  ma_cmp(scratch, rhs, scratch2);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs,
+                              Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  {
+    ScratchRegisterScope scratch(*this);
+
+    Register base = lhs.base;
+    uint32_t scale = Imm32::ShiftOf(lhs.scale).value;
+
+    // Load lhs into scratch2.
+    if (lhs.offset != 0) {
+      ma_add(base, Imm32(lhs.offset), scratch, scratch2);
+      ma_ldr(DTRAddr(scratch, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+    } else {
+      ma_ldr(DTRAddr(base, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+    }
+  }
+  branch32(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs,
+                              Register rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  {
+    ScratchRegisterScope scratch(*this);
+
+    Register base = lhs.base;
+    uint32_t scale = Imm32::ShiftOf(lhs.scale).value;
+
+    // Load lhs into scratch2.
+    if (lhs.offset != 0) {
+      ma_add(base, Imm32(lhs.offset), scratch, scratch2);
+      ma_ldr(DTRAddr(scratch, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+    } else {
+      ma_ldr(DTRAddr(base, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+    }
+  }
+  branch32(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, wasm::SymbolicAddress lhs,
+                              Imm32 rhs, Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  movePtr(lhs, scratch);
+  ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+  ma_cmp(scratch, rhs, scratch2);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs, Imm64 val,
+                              Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+
+  Label done;
+
+  if (cond == Assembler::Equal) {
+    branch32(Assembler::NotEqual, lhs, val.firstHalf(), &done);
+  } else {
+    branch32(Assembler::NotEqual, lhs, val.firstHalf(), label);
+  }
+  branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)),
+           val.secondHalf(), label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs,
+                              Register64 rhs, Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+
+  Label done;
+
+  if (cond == Assembler::Equal) {
+    branch32(Assembler::NotEqual, lhs, rhs.low, &done);
+  } else {
+    branch32(Assembler::NotEqual, lhs, rhs.low, label);
+  }
+  branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)), rhs.high,
+           label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs,
+                              const Address& rhs, Register scratch,
+                              Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+  MOZ_ASSERT(lhs.base != scratch);
+  MOZ_ASSERT(rhs.base != scratch);
+
+  Label done;
+
+  load32(rhs, scratch);
+  if (cond == Assembler::Equal) {
+    branch32(Assembler::NotEqual, lhs, scratch, &done);
+  } else {
+    branch32(Assembler::NotEqual, lhs, scratch, label);
+  }
+
+  load32(Address(rhs.base, rhs.offset + sizeof(uint32_t)), scratch);
+  branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)), scratch,
+           label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branch64(Condition cond, Register64 lhs, Imm64 val,
+                              Label* success, Label* fail) {
+  bool fallthrough = false;
+  Label fallthroughLabel;
+
+  if (!fail) {
+    fail = &fallthroughLabel;
+    fallthrough = true;
+  }
+
+  switch (cond) {
+    case Assembler::Equal:
+      branch32(Assembler::NotEqual, lhs.low, val.low(), fail);
+      branch32(Assembler::Equal, lhs.high, val.hi(), success);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    case Assembler::NotEqual:
+      branch32(Assembler::NotEqual, lhs.low, val.low(), success);
+      branch32(Assembler::NotEqual, lhs.high, val.hi(), success);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual: {
+      Assembler::Condition cond1 = Assembler::ConditionWithoutEqual(cond);
+      Assembler::Condition cond2 =
+          Assembler::ConditionWithoutEqual(Assembler::InvertCondition(cond));
+      Assembler::Condition cond3 = Assembler::UnsignedCondition(cond);
+
+      cmp32(lhs.high, val.hi());
+      ma_b(success, cond1);
+      ma_b(fail, cond2);
+      cmp32(lhs.low, val.low());
+      ma_b(success, cond3);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    }
+    default:
+      MOZ_CRASH("Condition code not supported");
+      break;
+  }
+
+  if (fallthrough) {
+    bind(fail);
+  }
+}
+
+void MacroAssembler::branch64(Condition cond, Register64 lhs, Register64 rhs,
+                              Label* success, Label* fail) {
+  bool fallthrough = false;
+  Label fallthroughLabel;
+
+  if (!fail) {
+    fail = &fallthroughLabel;
+    fallthrough = true;
+  }
+
+  switch (cond) {
+    case Assembler::Equal:
+      branch32(Assembler::NotEqual, lhs.low, rhs.low, fail);
+      branch32(Assembler::Equal, lhs.high, rhs.high, success);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    case Assembler::NotEqual:
+      branch32(Assembler::NotEqual, lhs.low, rhs.low, success);
+      branch32(Assembler::NotEqual, lhs.high, rhs.high, success);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual: {
+      Assembler::Condition cond1 = Assembler::ConditionWithoutEqual(cond);
+      Assembler::Condition cond2 =
+          Assembler::ConditionWithoutEqual(Assembler::InvertCondition(cond));
+      Assembler::Condition cond3 = Assembler::UnsignedCondition(cond);
+
+      cmp32(lhs.high, rhs.high);
+      ma_b(success, cond1);
+      ma_b(fail, cond2);
+      cmp32(lhs.low, rhs.low);
+      ma_b(success, cond3);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    }
+    default:
+      MOZ_CRASH("Condition code not supported");
+      break;
+  }
+
+  if (fallthrough) {
+    bind(fail);
+  }
+}
+
+template <class L>
+void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs,
+                               L label) {
+  branch32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs,
+                               Label* label) {
+  branch32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs,
+                               Label* label) {
+  branchPtr(cond, lhs, ImmWord(uintptr_t(rhs.value)), label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs,
+                               Label* label) {
+  ScratchRegisterScope scratch(*this);
+  movePtr(rhs, scratch);
+  branchPtr(cond, lhs, scratch, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs,
+                               Label* label) {
+  branch32(cond, lhs, Imm32(rhs.value), label);
+}
+
+template <class L>
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs,
+                               L label) {
+  branch32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs,
+                               Label* label) {
+  branchPtr(cond, lhs, ImmWord(uintptr_t(rhs.value)), label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs,
+                               Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs,
+                               Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
+                               Register rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
+                               ImmWord rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, wasm::SymbolicAddress lhs,
+                               Register rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
+                               ImmWord rhs, Label* label) {
+  branch32(cond, lhs, Imm32(rhs.value), label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
+                               Register rhs, Label* label) {
+  branch32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchPrivatePtr(Condition cond, const Address& lhs,
+                                      Register rhs, Label* label) {
+  branchPtr(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs,
+                                 FloatRegister rhs, Label* label) {
+  compareFloat(lhs, rhs);
+
+  if (cond == DoubleNotEqual) {
+    // Force the unordered cases not to jump.
+    Label unordered;
+    ma_b(&unordered, VFP_Unordered);
+    ma_b(label, VFP_NotEqualOrUnordered);
+    bind(&unordered);
+    return;
+  }
+
+  if (cond == DoubleEqualOrUnordered) {
+    ma_b(label, VFP_Unordered);
+    ma_b(label, VFP_Equal);
+    return;
+  }
+
+  ma_b(label, ConditionFromDoubleCondition(cond));
+}
+
+void MacroAssembler::branchTruncateFloat32MaybeModUint32(FloatRegister src,
+                                                         Register dest,
+                                                         Label* fail) {
+  branchTruncateFloat32ToInt32(src, dest, fail);
+}
+
+void MacroAssembler::branchTruncateFloat32ToInt32(FloatRegister src,
+                                                  Register dest, Label* fail) {
+  ScratchFloat32Scope scratchFloat32(*this);
+  ScratchRegisterScope scratch(*this);
+
+  ma_vcvt_F32_I32(src, scratchFloat32.sintOverlay());
+  ma_vxfer(scratchFloat32, dest);
+  ma_cmp(dest, Imm32(0x7fffffff), scratch);
+  ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
+  ma_b(fail, Assembler::Equal);
+}
+
+void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs,
+                                  FloatRegister rhs, Label* label) {
+  compareDouble(lhs, rhs);
+
+  if (cond == DoubleNotEqual) {
+    // Force the unordered cases not to jump.
+    Label unordered;
+    ma_b(&unordered, VFP_Unordered);
+    ma_b(label, VFP_NotEqualOrUnordered);
+    bind(&unordered);
+    return;
+  }
+
+  if (cond == DoubleEqualOrUnordered) {
+    ma_b(label, VFP_Unordered);
+    ma_b(label, VFP_Equal);
+    return;
+  }
+
+  ma_b(label, ConditionFromDoubleCondition(cond));
+}
+
+void MacroAssembler::branchTruncateDoubleMaybeModUint32(FloatRegister src,
+                                                        Register dest,
+                                                        Label* fail) {
+  branchTruncateDoubleToInt32(src, dest, fail);
+}
+
+// There are two options for implementing branchTruncateDoubleToInt32:
+//
+// 1. Convert the floating point value to an integer, if it did not fit, then it
+// was clamped to INT_MIN/INT_MAX, and we can test it. NOTE: if the value
+// really was supposed to be INT_MAX / INT_MIN then it will be wrong.
+//
+// 2. Convert the floating point value to an integer, if it did not fit, then it
+// set one or two bits in the fpcsr. Check those.
+void MacroAssembler::branchTruncateDoubleToInt32(FloatRegister src,
+                                                 Register dest, Label* fail) {
+  ScratchDoubleScope scratchDouble(*this);
+  FloatRegister scratchSIntReg = scratchDouble.sintOverlay();
+  ScratchRegisterScope scratch(*this);
+
+  ma_vcvt_F64_I32(src, scratchSIntReg);
+  ma_vxfer(scratchSIntReg, dest);
+  ma_cmp(dest, Imm32(0x7fffffff), scratch);
+  ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
+  ma_b(fail, Assembler::Equal);
+}
+
+template <typename T>
+void MacroAssembler::branchAdd32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  add32(src, dest);
+  as_b(label, cond);
+}
+
+template <typename T>
+void MacroAssembler::branchSub32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  sub32(src, dest);
+  j(cond, label);
+}
+
+template <typename T>
+void MacroAssembler::branchMul32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  MOZ_ASSERT(cond == Assembler::Overflow);
+  ScratchRegisterScope scratch(*this);
+  Assembler::Condition overflow_cond =
+      ma_check_mul(src, dest, dest, scratch, cond);
+  j(overflow_cond, label);
+}
+
+template <typename T>
+void MacroAssembler::branchRshift32(Condition cond, T src, Register dest,
+                                    Label* label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero);
+  rshift32(src, dest);
+  branch32(cond == Zero ? Equal : NotEqual, dest, Imm32(0), label);
+}
+
+void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) {
+  MOZ_ASSERT(cond == Overflow);
+  neg32(reg);
+  j(cond, label);
+}
+
+void MacroAssembler::branchAdd64(Condition cond, Imm64 imm, Register64 dest,
+                                 Label* label) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm.low(), dest.low, scratch, SetCC);
+  ma_adc(imm.hi(), dest.high, scratch, SetCC);
+  j(cond, label);
+}
+
+template <typename T>
+void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest,
+                                  Label* label) {
+  branchAdd32(cond, src, dest, label);
+}
+
+template <typename T>
+void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest,
+                                  Label* label) {
+  branchSub32(cond, src, dest, label);
+}
+
+void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest,
+                                  Label* label) {
+  branchMul32(cond, src, dest, label);
+}
+
+void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs,
+                                  Label* label) {
+  ScratchRegisterScope scratch(*this);
+  ma_sub(rhs, lhs, scratch, SetCC);
+  as_b(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs,
+                                  L label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+             cond == NotSigned);
+  // x86 likes test foo, foo rather than cmp foo, #0.
+  // Convert the former into the latter.
+  if (lhs == rhs && (cond == Zero || cond == NonZero)) {
+    as_cmp(lhs, Imm8(0));
+  } else {
+    ma_tst(lhs, rhs);
+  }
+  ma_b(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs,
+                                  L label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+             cond == NotSigned);
+  ScratchRegisterScope scratch(*this);
+  ma_tst(lhs, rhs, scratch);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs,
+                                  Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  load32(lhs, scratch2);
+  branchTest32(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchTest32(Condition cond, const AbsoluteAddress& lhs,
+                                  Imm32 rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  load32(lhs, scratch2);
+  branchTest32(cond, scratch2, rhs, label);
+}
+
+template <class L>
+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs,
+                                   L label) {
+  branchTest32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs,
+                                   Label* label) {
+  branchTest32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs,
+                                   Imm32 rhs, Label* label) {
+  branchTest32(cond, lhs, rhs, label);
+}
+
+template <class L>
+void MacroAssembler::branchTest64(Condition cond, Register64 lhs,
+                                  Register64 rhs, Register temp, L label) {
+  if (cond == Assembler::Zero || cond == Assembler::NonZero) {
+    ScratchRegisterScope scratch(*this);
+
+    MOZ_ASSERT(lhs.low == rhs.low);
+    MOZ_ASSERT(lhs.high == rhs.high);
+    ma_orr(lhs.low, lhs.high, scratch);
+    branchTestPtr(cond, scratch, scratch, label);
+  } else if (cond == Assembler::Signed || cond == Assembler::NotSigned) {
+    branchTest32(cond, lhs.high, rhs.high, label);
+  } else {
+    MOZ_CRASH("Unsupported condition");
+  }
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond, Register tag,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond, const Address& address,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond,
+                                         const BaseIndex& address,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond,
+                                         const ValueOperand& value,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t,
+                                             Label* label) {
+  Condition c = testUndefined(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, Register tag,
+                                     Label* label) {
+  branchTestInt32Impl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const Address& address,
+                                     Label* label) {
+  branchTestInt32Impl(cond, address, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address,
+                                     Label* label) {
+  branchTestInt32Impl(cond, address, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value,
+                                     Label* label) {
+  branchTestInt32Impl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestInt32Impl(Condition cond, const T& t,
+                                         Label* label) {
+  Condition c = testInt32(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestInt32Truthy(bool truthy,
+                                           const ValueOperand& value,
+                                           Label* label) {
+  Condition c = testInt32Truthy(truthy, value);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testDouble(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg,
+                                            Label* label) {
+  Condition c = testDoubleTruthy(truthy, reg);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestNumber(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestNumberImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestNumberImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestNumberImpl(Condition cond, const T& t,
+                                          Label* label) {
+  cond = testNumber(cond, t);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, Register tag,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, const Address& address,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond,
+                                       const ValueOperand& value,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestBooleanImpl(Condition cond, const T& t,
+                                           Label* label) {
+  Condition c = testBoolean(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestBooleanTruthy(bool truthy,
+                                             const ValueOperand& value,
+                                             Label* label) {
+  Condition c = testBooleanTruthy(truthy, value);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestString(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestStringImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestStringImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestStringImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestStringImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestStringImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testString(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestStringTruthy(bool truthy,
+                                            const ValueOperand& value,
+                                            Label* label) {
+  Condition c = testStringTruthy(truthy, value);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testSymbol(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestBigIntImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testBigInt(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestBigIntTruthy(bool truthy,
+                                            const ValueOperand& value,
+                                            Label* label) {
+  Condition c = testBigIntTruthy(truthy, value);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, Register tag,
+                                    Label* label) {
+  branchTestNullImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const Address& address,
+                                    Label* label) {
+  branchTestNullImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address,
+                                    Label* label) {
+  branchTestNullImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value,
+                                    Label* label) {
+  branchTestNullImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestNullImpl(Condition cond, const T& t,
+                                        Label* label) {
+  Condition c = testNull(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestObjectImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestObjectImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestObjectImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestObjectImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestObjectImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testObject(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond, const Address& address,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond,
+                                       const ValueOperand& value,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& t,
+                                           Label* label) {
+  Condition c = testGCThing(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestPrimitive(Condition cond, Register tag,
+                                         Label* label) {
+  branchTestPrimitiveImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestPrimitive(Condition cond,
+                                         const ValueOperand& value,
+                                         Label* label) {
+  branchTestPrimitiveImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t,
+                                             Label* label) {
+  Condition c = testPrimitive(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, Register tag,
+                                     Label* label) {
+  branchTestMagicImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const Address& address,
+                                     Label* label) {
+  branchTestMagicImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address,
+                                     Label* label) {
+  branchTestMagicImpl(cond, address, label);
+}
+
+template <class L>
+void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value,
+                                     L label) {
+  branchTestMagicImpl(cond, value, label);
+}
+
+template <typename T, class L>
+void MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label) {
+  cond = testMagic(cond, t);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const Address& valaddr,
+                                     JSWhyMagic why, Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+
+  Label notMagic;
+  if (cond == Assembler::Equal) {
+    branchTestMagic(Assembler::NotEqual, valaddr, &notMagic);
+  } else {
+    branchTestMagic(Assembler::NotEqual, valaddr, label);
+  }
+
+  branch32(cond, ToPayload(valaddr), Imm32(why), label);
+  bind(&notMagic);
+}
+
+void MacroAssembler::branchTestValue(Condition cond, const BaseIndex& lhs,
+                                     const ValueOperand& rhs, Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+
+  Label notSameValue;
+  if (cond == Assembler::Equal) {
+    branch32(Assembler::NotEqual, ToType(lhs), rhs.typeReg(), &notSameValue);
+  } else {
+    branch32(Assembler::NotEqual, ToType(lhs), rhs.typeReg(), label);
+  }
+
+  branch32(cond, ToPayload(lhs), rhs.payloadReg(), label);
+  bind(&notSameValue);
+}
+
+template <typename T>
+void MacroAssembler::testNumberSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testNumber(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testBooleanSet(Condition cond, const T& src,
+                                    Register dest) {
+  cond = testBoolean(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testStringSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testString(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testSymbolSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testSymbol(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testBigIntSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testBigInt(cond, src);
+  emitSet(cond, dest);
+}
+
+void MacroAssembler::branchToComputedAddress(const BaseIndex& addr) {
+  MOZ_ASSERT(
+      addr.offset == 0,
+      "NYI: offsets from pc should be shifted by the number of instructions.");
+
+  Register base = addr.base;
+  uint32_t scale = Imm32::ShiftOf(addr.scale).value;
+
+  ma_ldr(DTRAddr(base, DtrRegImmShift(addr.index, LSL, scale)), pc);
+
+  if (base == pc) {
+    // When loading from pc, the pc is shifted to the next instruction, we
+    // add one extra instruction to accomodate for this shifted offset.
+    breakpoint();
+  }
+}
+
+void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs,
+                                 Register src, Register dest) {
+  cmp32(lhs, rhs);
+  ma_mov(src, dest, LeaveCC, cond);
+}
+
+void MacroAssembler::cmp32MovePtr(Condition cond, Register lhs, Imm32 rhs,
+                                  Register src, Register dest) {
+  cmp32(lhs, rhs);
+  ma_mov(src, dest, LeaveCC, cond);
+}
+
+void MacroAssembler::cmp32Move32(Condition cond, Register lhs,
+                                 const Address& rhs, Register src,
+                                 Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+  ma_ldr(rhs, scratch, scratch2);
+  cmp32Move32(cond, lhs, scratch, src, dest);
+}
+
+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, Register rhs,
+                                   Register src, Register dest) {
+  cmp32Move32(cond, lhs, rhs, src, dest);
+}
+
+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs,
+                                   const Address& rhs, Register src,
+                                   Register dest) {
+  cmp32Move32(cond, lhs, rhs, src, dest);
+}
+
+void MacroAssembler::cmp32Load32(Condition cond, Register lhs,
+                                 const Address& rhs, const Address& src,
+                                 Register dest) {
+  // This is never used, but must be present to facilitate linking on arm.
+  MOZ_CRASH("No known use cases");
+}
+
+void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs,
+                                 const Address& src, Register dest) {
+  // This is never used, but must be present to facilitate linking on arm.
+  MOZ_CRASH("No known use cases");
+}
+
+void MacroAssembler::cmp32LoadPtr(Condition cond, const Address& lhs, Imm32 rhs,
+                                  const Address& src, Register dest) {
+  cmp32(lhs, rhs);
+  ScratchRegisterScope scratch(*this);
+  ma_ldr(src, dest, scratch, Offset, cond);
+}
+
+void MacroAssembler::test32LoadPtr(Condition cond, const Address& addr,
+                                   Imm32 mask, const Address& src,
+                                   Register dest) {
+  MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero);
+  test32(addr, mask);
+  ScratchRegisterScope scratch(*this);
+  ma_ldr(src, dest, scratch, Offset, cond);
+}
+
+void MacroAssembler::test32MovePtr(Condition cond, const Address& addr,
+                                   Imm32 mask, Register src, Register dest) {
+  MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero);
+  test32(addr, mask);
+  ma_mov(src, dest, LeaveCC, cond);
+}
+
+void MacroAssembler::spectreMovePtr(Condition cond, Register src,
+                                    Register dest) {
+  ma_mov(src, dest, LeaveCC, cond);
+}
+
+void MacroAssembler::spectreZeroRegister(Condition cond, Register,
+                                         Register dest) {
+  ma_mov(Imm32(0), dest, cond);
+}
+
+void MacroAssembler::spectreBoundsCheck32(Register index, Register length,
+                                          Register maybeScratch,
+                                          Label* failure) {
+  MOZ_ASSERT(length != maybeScratch);
+  MOZ_ASSERT(index != maybeScratch);
+
+  branch32(Assembler::BelowOrEqual, length, index, failure);
+
+  if (JitOptions.spectreIndexMasking) {
+    ma_mov(Imm32(0), index, Assembler::BelowOrEqual);
+  }
+}
+
+void MacroAssembler::spectreBoundsCheck32(Register index, const Address& length,
+                                          Register maybeScratch,
+                                          Label* failure) {
+  MOZ_ASSERT(index != length.base);
+  MOZ_ASSERT(length.base != maybeScratch);
+  MOZ_ASSERT(index != maybeScratch);
+
+  branch32(Assembler::BelowOrEqual, length, index, failure);
+
+  if (JitOptions.spectreIndexMasking) {
+    ma_mov(Imm32(0), index, Assembler::BelowOrEqual);
+  }
+}
+
+void MacroAssembler::spectreBoundsCheckPtr(Register index, Register length,
+                                           Register maybeScratch,
+                                           Label* failure) {
+  spectreBoundsCheck32(index, length, maybeScratch, failure);
+}
+
+void MacroAssembler::spectreBoundsCheckPtr(Register index,
+                                           const Address& length,
+                                           Register maybeScratch,
+                                           Label* failure) {
+  spectreBoundsCheck32(index, length, maybeScratch, failure);
+}
+
+// ========================================================================
+// Memory access primitives.
+void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src,
+                                                const Address& addr) {
+  ScratchRegisterScope scratch(*this);
+  ma_vstr(src, addr, scratch);
+}
+void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src,
+                                                const BaseIndex& addr) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+  uint32_t scale = Imm32::ShiftOf(addr.scale).value;
+  ma_vstr(src, addr.base, addr.index, scratch, scratch2, scale, addr.offset);
+}
+
+void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src,
+                                                 const Address& addr) {
+  ScratchRegisterScope scratch(*this);
+  ma_vstr(src.asSingle(), addr, scratch);
+}
+void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src,
+                                                 const BaseIndex& addr) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+  uint32_t scale = Imm32::ShiftOf(addr.scale).value;
+  ma_vstr(src.asSingle(), addr.base, addr.index, scratch, scratch2, scale,
+          addr.offset);
+}
+
+void MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) {
+  // On ARMv6 the optional argument (BarrierST, etc) is ignored.
+  if (barrier == (MembarStoreStore | MembarSynchronizing)) {
+    ma_dsb(BarrierST);
+  } else if (barrier & MembarSynchronizing) {
+    ma_dsb();
+  } else if (barrier == MembarStoreStore) {
+    ma_dmb(BarrierST);
+  } else if (barrier) {
+    ma_dmb();
+  }
+}
+
+// ===============================================================
+// Clamping functions.
+
+void MacroAssembler::clampIntToUint8(Register reg) {
+  // Look at (reg >> 8) if it is 0, then reg shouldn't be clamped if it is
+  // <0, then we want to clamp to 0, otherwise, we wish to clamp to 255
+  ScratchRegisterScope scratch(*this);
+  as_mov(scratch, asr(reg, 8), SetCC);
+  ma_mov(Imm32(0xff), reg, NotEqual);
+  ma_mov(Imm32(0), reg, Signed);
+}
+
+template <typename T>
+void MacroAssemblerARMCompat::fallibleUnboxPtrImpl(const T& src, Register dest,
+                                                   JSValueType type,
+                                                   Label* fail) {
+  switch (type) {
+    case JSVAL_TYPE_OBJECT:
+      asMasm().branchTestObject(Assembler::NotEqual, src, fail);
+      break;
+    case JSVAL_TYPE_STRING:
+      asMasm().branchTestString(Assembler::NotEqual, src, fail);
+      break;
+    case JSVAL_TYPE_SYMBOL:
+      asMasm().branchTestSymbol(Assembler::NotEqual, src, fail);
+      break;
+    case JSVAL_TYPE_BIGINT:
+      asMasm().branchTestBigInt(Assembler::NotEqual, src, fail);
+      break;
+    default:
+      MOZ_CRASH("Unexpected type");
+  }
+  unboxNonDouble(src, dest, type);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const ValueOperand& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  fallibleUnboxPtrImpl(src, dest, type, fail);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const Address& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  fallibleUnboxPtrImpl(src, dest, type, fail);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const BaseIndex& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  fallibleUnboxPtrImpl(src, dest, type, fail);
+}
+
+//}}} check_macroassembler_style
+// ===============================================================
+
+void MacroAssemblerARMCompat::incrementInt32Value(const Address& addr) {
+  asMasm().add32(Imm32(1), ToPayload(addr));
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_MacroAssembler_arm_inl_h */
diff --git a/js/src/jit/arm/MacroAssembler-arm.cpp b/js/src/jit/arm/MacroAssembler-arm.cpp
new file mode 100644
index 0000000000..da358c5ec9
--- /dev/null
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -0,0 +1,6382 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/MacroAssembler-arm.h"
+
+#include "mozilla/Casting.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include "jsmath.h"
+
+#include "jit/arm/Simulator-arm.h"
+#include "jit/AtomicOp.h"
+#include "jit/AtomicOperations.h"
+#include "jit/Bailouts.h"
+#include "jit/BaselineFrame.h"
+#include "jit/JitFrames.h"
+#include "jit/JitRuntime.h"
+#include "jit/MacroAssembler.h"
+#include "jit/MoveEmitter.h"
+#include "js/ScalarType.h"  // js::Scalar::Type
+#include "util/Memory.h"
+#include "vm/BigIntType.h"
+#include "vm/JitActivation.h"  // js::jit::JitActivation
+#include "vm/JSContext.h"
+#include "vm/StringType.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace jit;
+
+using mozilla::Abs;
+using mozilla::BitwiseCast;
+using mozilla::DebugOnly;
+using mozilla::IsPositiveZero;
+using mozilla::Maybe;
+
+bool isValueDTRDCandidate(ValueOperand& val) {
+  // In order to be used for a DTRD memory function, the two target registers
+  // need to be a) Adjacent, with the tag larger than the payload, and b)
+  // Aligned to a multiple of two.
+  if ((val.typeReg().code() != (val.payloadReg().code() + 1))) {
+    return false;
+  }
+  if ((val.payloadReg().code() & 1) != 0) {
+    return false;
+  }
+  return true;
+}
+
+void MacroAssemblerARM::convertBoolToInt32(Register source, Register dest) {
+  // Note that C++ bool is only 1 byte, so zero extend it to clear the
+  // higher-order bits.
+  as_and(dest, source, Imm8(0xff));
+}
+
+void MacroAssemblerARM::convertInt32ToDouble(Register src,
+                                             FloatRegister dest_) {
+  // Direct conversions aren't possible.
+  VFPRegister dest = VFPRegister(dest_);
+  as_vxfer(src, InvalidReg, dest.sintOverlay(), CoreToFloat);
+  as_vcvt(dest, dest.sintOverlay());
+}
+
+void MacroAssemblerARM::convertInt32ToDouble(const Address& src,
+                                             FloatRegister dest) {
+  ScratchDoubleScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_vldr(src, scratch, scratch2);
+  as_vcvt(dest, VFPRegister(scratch).sintOverlay());
+}
+
+void MacroAssemblerARM::convertInt32ToDouble(const BaseIndex& src,
+                                             FloatRegister dest) {
+  Register base = src.base;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (src.offset != 0) {
+    ma_add(base, Imm32(src.offset), scratch, scratch2);
+    base = scratch;
+  }
+  ma_ldr(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), scratch);
+  convertInt32ToDouble(scratch, dest);
+}
+
+void MacroAssemblerARM::convertUInt32ToDouble(Register src,
+                                              FloatRegister dest_) {
+  // Direct conversions aren't possible.
+  VFPRegister dest = VFPRegister(dest_);
+  as_vxfer(src, InvalidReg, dest.uintOverlay(), CoreToFloat);
+  as_vcvt(dest, dest.uintOverlay());
+}
+
+static const double TO_DOUBLE_HIGH_SCALE = 0x100000000;
+
+void MacroAssemblerARM::convertUInt32ToFloat32(Register src,
+                                               FloatRegister dest_) {
+  // Direct conversions aren't possible.
+  VFPRegister dest = VFPRegister(dest_);
+  as_vxfer(src, InvalidReg, dest.uintOverlay(), CoreToFloat);
+  as_vcvt(VFPRegister(dest).singleOverlay(), dest.uintOverlay());
+}
+
+void MacroAssemblerARM::convertDoubleToFloat32(FloatRegister src,
+                                               FloatRegister dest,
+                                               Condition c) {
+  as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src), false, c);
+}
+
+// Checks whether a double is representable as a 32-bit integer. If so, the
+// integer is written to the output register. Otherwise, a bailout is taken to
+// the given snapshot. This function overwrites the scratch float register.
+void MacroAssemblerARM::convertDoubleToInt32(FloatRegister src, Register dest,
+                                             Label* fail,
+                                             bool negativeZeroCheck) {
+  // Convert the floating point value to an integer, if it did not fit, then
+  // when we convert it *back* to a float, it will have a different value,
+  // which we can test.
+  ScratchDoubleScope scratchDouble(asMasm());
+  ScratchRegisterScope scratch(asMasm());
+
+  FloatRegister scratchSIntReg = scratchDouble.sintOverlay();
+
+  ma_vcvt_F64_I32(src, scratchSIntReg);
+  // Move the value into the dest register.
+  ma_vxfer(scratchSIntReg, dest);
+  ma_vcvt_I32_F64(scratchSIntReg, scratchDouble);
+  ma_vcmp(src, scratchDouble);
+  as_vmrs(pc);
+  ma_b(fail, Assembler::VFP_NotEqualOrUnordered);
+
+  if (negativeZeroCheck) {
+    as_cmp(dest, Imm8(0));
+    // Test and bail for -0.0, when integer result is 0. Move the top word
+    // of the double into the output reg, if it is non-zero, then the
+    // original value was -0.0.
+    as_vxfer(dest, InvalidReg, src, FloatToCore, Assembler::Equal, 1);
+    ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::Equal);
+    ma_b(fail, Assembler::Equal);
+  }
+}
+
+// Checks whether a float32 is representable as a 32-bit integer. If so, the
+// integer is written to the output register. Otherwise, a bailout is taken to
+// the given snapshot. This function overwrites the scratch float register.
+void MacroAssemblerARM::convertFloat32ToInt32(FloatRegister src, Register dest,
+                                              Label* fail,
+                                              bool negativeZeroCheck) {
+  // Converting the floating point value to an integer and then converting it
+  // back to a float32 would not work, as float to int32 conversions are
+  // clamping (e.g. float(INT32_MAX + 1) would get converted into INT32_MAX
+  // and then back to float(INT32_MAX + 1)).  If this ever happens, we just
+  // bail out.
+  ScratchFloat32Scope scratchFloat(asMasm());
+  ScratchRegisterScope scratch(asMasm());
+
+  FloatRegister ScratchSIntReg = scratchFloat.sintOverlay();
+  ma_vcvt_F32_I32(src, ScratchSIntReg);
+
+  // Store the result
+  ma_vxfer(ScratchSIntReg, dest);
+
+  ma_vcvt_I32_F32(ScratchSIntReg, scratchFloat);
+  ma_vcmp(src, scratchFloat);
+  as_vmrs(pc);
+  ma_b(fail, Assembler::VFP_NotEqualOrUnordered);
+
+  // Bail out in the clamped cases.
+  ma_cmp(dest, Imm32(0x7fffffff), scratch);
+  ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
+  ma_b(fail, Assembler::Equal);
+
+  if (negativeZeroCheck) {
+    as_cmp(dest, Imm8(0));
+    // Test and bail for -0.0, when integer result is 0. Move the float into
+    // the output reg, and if it is non-zero then the original value was
+    // -0.0
+    as_vxfer(dest, InvalidReg, VFPRegister(src).singleOverlay(), FloatToCore,
+             Assembler::Equal, 0);
+    ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::Equal);
+    ma_b(fail, Assembler::Equal);
+  }
+}
+
+void MacroAssemblerARM::convertFloat32ToDouble(FloatRegister src,
+                                               FloatRegister dest) {
+  MOZ_ASSERT(dest.isDouble());
+  MOZ_ASSERT(src.isSingle());
+  as_vcvt(VFPRegister(dest), VFPRegister(src).singleOverlay());
+}
+
+void MacroAssemblerARM::convertInt32ToFloat32(Register src,
+                                              FloatRegister dest) {
+  // Direct conversions aren't possible.
+  as_vxfer(src, InvalidReg, dest.sintOverlay(), CoreToFloat);
+  as_vcvt(dest.singleOverlay(), dest.sintOverlay());
+}
+
+void MacroAssemblerARM::convertInt32ToFloat32(const Address& src,
+                                              FloatRegister dest) {
+  ScratchFloat32Scope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_vldr(src, scratch, scratch2);
+  as_vcvt(dest, VFPRegister(scratch).sintOverlay());
+}
+
+bool MacroAssemblerARM::alu_dbl(Register src1, Imm32 imm, Register dest,
+                                ALUOp op, SBit s, Condition c) {
+  if ((s == SetCC && !condsAreSafe(op)) || !can_dbl(op)) {
+    return false;
+  }
+
+  ALUOp interop = getDestVariant(op);
+  Imm8::TwoImm8mData both = Imm8::EncodeTwoImms(imm.value);
+  if (both.fst().invalid()) {
+    return false;
+  }
+
+  // For the most part, there is no good reason to set the condition codes for
+  // the first instruction. We can do better things if the second instruction
+  // doesn't have a dest, such as check for overflow by doing first operation
+  // don't do second operation if first operation overflowed. This preserves
+  // the overflow condition code. Unfortunately, it is horribly brittle.
+  as_alu(dest, src1, Operand2(both.fst()), interop, LeaveCC, c);
+  as_alu(dest, dest, Operand2(both.snd()), op, s, c);
+  return true;
+}
+
+void MacroAssemblerARM::ma_alu(Register src1, Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, ALUOp op, SBit s,
+                               Condition c) {
+  // ma_mov should be used for moves.
+  MOZ_ASSERT(op != OpMov);
+  MOZ_ASSERT(op != OpMvn);
+  MOZ_ASSERT(src1 != scratch);
+
+  // As it turns out, if you ask for a compare-like instruction you *probably*
+  // want it to set condition codes.
+  MOZ_ASSERT_IF(dest == InvalidReg, s == SetCC);
+
+  // The operator gives us the ability to determine how this can be used.
+  Imm8 imm8 = Imm8(imm.value);
+  // One instruction: If we can encode it using an imm8m, then do so.
+  if (!imm8.invalid()) {
+    as_alu(dest, src1, imm8, op, s, c);
+    return;
+  }
+
+  // One instruction, negated:
+  Imm32 negImm = imm;
+  Register negDest;
+  ALUOp negOp = ALUNeg(op, dest, scratch, &negImm, &negDest);
+  Imm8 negImm8 = Imm8(negImm.value);
+  // 'add r1, r2, -15' can be replaced with 'sub r1, r2, 15'.
+  // The dest can be replaced (InvalidReg => scratch).
+  // This is useful if we wish to negate tst. tst has an invalid (aka not
+  // used) dest, but its negation bic requires a dest.
+  if (negOp != OpInvalid && !negImm8.invalid()) {
+    as_alu(negDest, src1, negImm8, negOp, s, c);
+    return;
+  }
+
+  // Start by attempting to generate a two instruction form. Some things
+  // cannot be made into two-inst forms correctly. Namely, adds dest, src,
+  // 0xffff. Since we want the condition codes (and don't know which ones
+  // will be checked), we need to assume that the overflow flag will be
+  // checked and add{,s} dest, src, 0xff00; add{,s} dest, dest, 0xff is not
+  // guaranteed to set the overflof flag the same as the (theoretical) one
+  // instruction variant.
+  if (alu_dbl(src1, imm, dest, op, s, c)) {
+    return;
+  }
+
+  // And try with its negative.
+  if (negOp != OpInvalid && alu_dbl(src1, negImm, negDest, negOp, s, c)) {
+    return;
+  }
+
+  ma_mov(imm, scratch, c);
+  as_alu(dest, src1, O2Reg(scratch), op, s, c);
+}
+
+void MacroAssemblerARM::ma_alu(Register src1, Operand op2, Register dest,
+                               ALUOp op, SBit s, Assembler::Condition c) {
+  MOZ_ASSERT(op2.tag() == Operand::Tag::OP2);
+  as_alu(dest, src1, op2.toOp2(), op, s, c);
+}
+
+void MacroAssemblerARM::ma_alu(Register src1, Operand2 op2, Register dest,
+                               ALUOp op, SBit s, Condition c) {
+  as_alu(dest, src1, op2, op, s, c);
+}
+
+void MacroAssemblerARM::ma_nop() { as_nop(); }
+
+BufferOffset MacroAssemblerARM::ma_movPatchable(Imm32 imm_, Register dest,
+                                                Assembler::Condition c) {
+  int32_t imm = imm_.value;
+  if (HasMOVWT()) {
+    BufferOffset offset = as_movw(dest, Imm16(imm & 0xffff), c);
+    as_movt(dest, Imm16(imm >> 16 & 0xffff), c);
+    return offset;
+  } else {
+    return as_Imm32Pool(dest, imm, c);
+  }
+}
+
+BufferOffset MacroAssemblerARM::ma_movPatchable(ImmPtr imm, Register dest,
+                                                Assembler::Condition c) {
+  return ma_movPatchable(Imm32(int32_t(imm.value)), dest, c);
+}
+
+/* static */
+template <class Iter>
+void MacroAssemblerARM::ma_mov_patch(Imm32 imm32, Register dest,
+                                     Assembler::Condition c, RelocStyle rs,
+                                     Iter iter) {
+  // The current instruction must be an actual instruction,
+  // not automatically-inserted boilerplate.
+  MOZ_ASSERT(iter.cur());
+  MOZ_ASSERT(iter.cur() == iter.maybeSkipAutomaticInstructions());
+
+  int32_t imm = imm32.value;
+  switch (rs) {
+    case L_MOVWT:
+      Assembler::as_movw_patch(dest, Imm16(imm & 0xffff), c, iter.cur());
+      Assembler::as_movt_patch(dest, Imm16(imm >> 16 & 0xffff), c, iter.next());
+      break;
+    case L_LDR:
+      Assembler::WritePoolEntry(iter.cur(), c, imm);
+      break;
+  }
+}
+
+template void MacroAssemblerARM::ma_mov_patch(Imm32 imm32, Register dest,
+                                              Assembler::Condition c,
+                                              RelocStyle rs,
+                                              InstructionIterator iter);
+template void MacroAssemblerARM::ma_mov_patch(Imm32 imm32, Register dest,
+                                              Assembler::Condition c,
+                                              RelocStyle rs,
+                                              BufferInstructionIterator iter);
+
+void MacroAssemblerARM::ma_mov(Register src, Register dest, SBit s,
+                               Assembler::Condition c) {
+  if (s == SetCC || dest != src) {
+    as_mov(dest, O2Reg(src), s, c);
+  }
+}
+
+void MacroAssemblerARM::ma_mov(Imm32 imm, Register dest,
+                               Assembler::Condition c) {
+  // Try mov with Imm8 operand.
+  Imm8 imm8 = Imm8(imm.value);
+  if (!imm8.invalid()) {
+    as_alu(dest, InvalidReg, imm8, OpMov, LeaveCC, c);
+    return;
+  }
+
+  // Try mvn with Imm8 operand.
+  Imm8 negImm8 = Imm8(~imm.value);
+  if (!negImm8.invalid()) {
+    as_alu(dest, InvalidReg, negImm8, OpMvn, LeaveCC, c);
+    return;
+  }
+
+  // Try movw/movt.
+  if (HasMOVWT()) {
+    // ARMv7 supports movw/movt. movw zero-extends its 16 bit argument,
+    // so we can set the register this way. movt leaves the bottom 16
+    // bits in tact, so we always need a movw.
+    as_movw(dest, Imm16(imm.value & 0xffff), c);
+    if (uint32_t(imm.value) >> 16) {
+      as_movt(dest, Imm16(uint32_t(imm.value) >> 16), c);
+    }
+    return;
+  }
+
+  // If we don't have movw/movt, we need a load.
+  as_Imm32Pool(dest, imm.value, c);
+}
+
+void MacroAssemblerARM::ma_mov(ImmWord imm, Register dest,
+                               Assembler::Condition c) {
+  ma_mov(Imm32(imm.value), dest, c);
+}
+
+void MacroAssemblerARM::ma_mov(ImmGCPtr ptr, Register dest) {
+  BufferOffset offset =
+      ma_movPatchable(Imm32(uintptr_t(ptr.value)), dest, Always);
+  writeDataRelocation(offset, ptr);
+}
+
+// Shifts (just a move with a shifting op2)
+void MacroAssemblerARM::ma_lsl(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, lsl(src, shift.value));
+}
+
+void MacroAssemblerARM::ma_lsr(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, lsr(src, shift.value));
+}
+
+void MacroAssemblerARM::ma_asr(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, asr(src, shift.value));
+}
+
+void MacroAssemblerARM::ma_ror(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, ror(src, shift.value));
+}
+
+void MacroAssemblerARM::ma_rol(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, rol(src, shift.value));
+}
+
+// Shifts (just a move with a shifting op2)
+void MacroAssemblerARM::ma_lsl(Register shift, Register src, Register dst) {
+  as_mov(dst, lsl(src, shift));
+}
+
+void MacroAssemblerARM::ma_lsr(Register shift, Register src, Register dst) {
+  as_mov(dst, lsr(src, shift));
+}
+
+void MacroAssemblerARM::ma_asr(Register shift, Register src, Register dst) {
+  as_mov(dst, asr(src, shift));
+}
+
+void MacroAssemblerARM::ma_ror(Register shift, Register src, Register dst) {
+  as_mov(dst, ror(src, shift));
+}
+
+void MacroAssemblerARM::ma_rol(Register shift, Register src, Register dst,
+                               AutoRegisterScope& scratch) {
+  as_rsb(scratch, shift, Imm8(32));
+  as_mov(dst, ror(src, scratch));
+}
+
+// Move not (dest <- ~src)
+void MacroAssemblerARM::ma_mvn(Register src1, Register dest, SBit s,
+                               Assembler::Condition c) {
+  as_alu(dest, InvalidReg, O2Reg(src1), OpMvn, s, c);
+}
+
+// Negate (dest <- -src), src is a register, rather than a general op2.
+void MacroAssemblerARM::ma_neg(Register src1, Register dest, SBit s,
+                               Assembler::Condition c) {
+  as_rsb(dest, src1, Imm8(0), s, c);
+}
+
+void MacroAssemblerARM::ma_neg(Register64 src, Register64 dest) {
+  as_rsb(dest.low, src.low, Imm8(0), SetCC);
+  as_rsc(dest.high, src.high, Imm8(0));
+}
+
+// And.
+void MacroAssemblerARM::ma_and(Register src, Register dest, SBit s,
+                               Assembler::Condition c) {
+  ma_and(dest, src, dest);
+}
+
+void MacroAssemblerARM::ma_and(Register src1, Register src2, Register dest,
+                               SBit s, Assembler::Condition c) {
+  as_and(dest, src1, O2Reg(src2), s, c);
+}
+
+void MacroAssemblerARM::ma_and(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpAnd, s, c);
+}
+
+void MacroAssemblerARM::ma_and(Imm32 imm, Register src1, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(src1, imm, dest, scratch, OpAnd, s, c);
+}
+
+// Bit clear (dest <- dest & ~imm) or (dest <- src1 & ~src2).
+void MacroAssemblerARM::ma_bic(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpBic, s, c);
+}
+
+// Exclusive or.
+void MacroAssemblerARM::ma_eor(Register src, Register dest, SBit s,
+                               Assembler::Condition c) {
+  ma_eor(dest, src, dest, s, c);
+}
+
+void MacroAssemblerARM::ma_eor(Register src1, Register src2, Register dest,
+                               SBit s, Assembler::Condition c) {
+  as_eor(dest, src1, O2Reg(src2), s, c);
+}
+
+void MacroAssemblerARM::ma_eor(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpEor, s, c);
+}
+
+void MacroAssemblerARM::ma_eor(Imm32 imm, Register src1, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(src1, imm, dest, scratch, OpEor, s, c);
+}
+
+// Or.
+void MacroAssemblerARM::ma_orr(Register src, Register dest, SBit s,
+                               Assembler::Condition c) {
+  ma_orr(dest, src, dest, s, c);
+}
+
+void MacroAssemblerARM::ma_orr(Register src1, Register src2, Register dest,
+                               SBit s, Assembler::Condition c) {
+  as_orr(dest, src1, O2Reg(src2), s, c);
+}
+
+void MacroAssemblerARM::ma_orr(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpOrr, s, c);
+}
+
+void MacroAssemblerARM::ma_orr(Imm32 imm, Register src1, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(src1, imm, dest, scratch, OpOrr, s, c);
+}
+
+// Arithmetic-based ops.
+// Add with carry.
+void MacroAssemblerARM::ma_adc(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpAdc, s, c);
+}
+
+void MacroAssemblerARM::ma_adc(Register src, Register dest, SBit s,
+                               Condition c) {
+  as_alu(dest, dest, O2Reg(src), OpAdc, s, c);
+}
+
+void MacroAssemblerARM::ma_adc(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpAdc, s, c);
+}
+
+void MacroAssemblerARM::ma_adc(Register src1, Imm32 op, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, scratch, OpAdc, s, c);
+}
+
+// Add.
+void MacroAssemblerARM::ma_add(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpAdd, s, c);
+}
+
+void MacroAssemblerARM::ma_add(Register src1, Register dest, SBit s,
+                               Condition c) {
+  ma_alu(dest, O2Reg(src1), dest, OpAdd, s, c);
+}
+
+void MacroAssemblerARM::ma_add(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpAdd, s, c);
+}
+
+void MacroAssemblerARM::ma_add(Register src1, Operand op, Register dest, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, OpAdd, s, c);
+}
+
+void MacroAssemblerARM::ma_add(Register src1, Imm32 op, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, scratch, OpAdd, s, c);
+}
+
+// Subtract with carry.
+void MacroAssemblerARM::ma_sbc(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpSbc, s, c);
+}
+
+void MacroAssemblerARM::ma_sbc(Register src1, Register dest, SBit s,
+                               Condition c) {
+  as_alu(dest, dest, O2Reg(src1), OpSbc, s, c);
+}
+
+void MacroAssemblerARM::ma_sbc(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpSbc, s, c);
+}
+
+// Subtract.
+void MacroAssemblerARM::ma_sub(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpSub, s, c);
+}
+
+void MacroAssemblerARM::ma_sub(Register src1, Register dest, SBit s,
+                               Condition c) {
+  ma_alu(dest, Operand(src1), dest, OpSub, s, c);
+}
+
+void MacroAssemblerARM::ma_sub(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  ma_alu(src1, Operand(src2), dest, OpSub, s, c);
+}
+
+void MacroAssemblerARM::ma_sub(Register src1, Operand op, Register dest, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, OpSub, s, c);
+}
+
+void MacroAssemblerARM::ma_sub(Register src1, Imm32 op, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, scratch, OpSub, s, c);
+}
+
+// Reverse subtract.
+void MacroAssemblerARM::ma_rsb(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpRsb, s, c);
+}
+
+void MacroAssemblerARM::ma_rsb(Register src1, Register dest, SBit s,
+                               Condition c) {
+  as_alu(dest, src1, O2Reg(dest), OpRsb, s, c);
+}
+
+void MacroAssemblerARM::ma_rsb(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpRsb, s, c);
+}
+
+void MacroAssemblerARM::ma_rsb(Register src1, Imm32 op2, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(src1, op2, dest, scratch, OpRsb, s, c);
+}
+
+// Reverse subtract with carry.
+void MacroAssemblerARM::ma_rsc(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpRsc, s, c);
+}
+
+void MacroAssemblerARM::ma_rsc(Register src1, Register dest, SBit s,
+                               Condition c) {
+  as_alu(dest, dest, O2Reg(src1), OpRsc, s, c);
+}
+
+void MacroAssemblerARM::ma_rsc(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpRsc, s, c);
+}
+
+// Compares/tests.
+// Compare negative (sets condition codes as src1 + src2 would).
+void MacroAssemblerARM::ma_cmn(Register src1, Imm32 imm,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_alu(src1, imm, InvalidReg, scratch, OpCmn, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_cmn(Register src1, Register src2, Condition c) {
+  as_alu(InvalidReg, src2, O2Reg(src1), OpCmn, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_cmn(Register src1, Operand op, Condition c) {
+  MOZ_CRASH("Feature NYI");
+}
+
+// Compare (src - src2).
+void MacroAssemblerARM::ma_cmp(Register src1, Imm32 imm,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_alu(src1, imm, InvalidReg, scratch, OpCmp, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, ImmTag tag, Condition c) {
+  // ImmTag comparisons can always be done without use of a scratch register.
+  Imm8 negtag = Imm8(-tag.value);
+  MOZ_ASSERT(!negtag.invalid());
+  as_cmn(src1, negtag, c);
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, ImmWord ptr,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_cmp(src1, Imm32(ptr.value), scratch, c);
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, ImmGCPtr ptr,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_mov(ptr, scratch);
+  ma_cmp(src1, scratch, c);
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, Operand op,
+                               AutoRegisterScope& scratch,
+                               AutoRegisterScope& scratch2, Condition c) {
+  switch (op.tag()) {
+    case Operand::Tag::OP2:
+      as_cmp(src1, op.toOp2(), c);
+      break;
+    case Operand::Tag::MEM:
+      ma_ldr(op.toAddress(), scratch, scratch2);
+      as_cmp(src1, O2Reg(scratch), c);
+      break;
+    default:
+      MOZ_CRASH("trying to compare FP and integer registers");
+  }
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, Register src2, Condition c) {
+  as_cmp(src1, O2Reg(src2), c);
+}
+
+// Test for equality, (src1 ^ src2).
+void MacroAssemblerARM::ma_teq(Register src1, Imm32 imm,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_alu(src1, imm, InvalidReg, scratch, OpTeq, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_teq(Register src1, Register src2, Condition c) {
+  as_tst(src1, O2Reg(src2), c);
+}
+
+void MacroAssemblerARM::ma_teq(Register src1, Operand op, Condition c) {
+  as_teq(src1, op.toOp2(), c);
+}
+
+// Test (src1 & src2).
+void MacroAssemblerARM::ma_tst(Register src1, Imm32 imm,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_alu(src1, imm, InvalidReg, scratch, OpTst, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_tst(Register src1, Register src2, Condition c) {
+  as_tst(src1, O2Reg(src2), c);
+}
+
+void MacroAssemblerARM::ma_tst(Register src1, Operand op, Condition c) {
+  as_tst(src1, op.toOp2(), c);
+}
+
+void MacroAssemblerARM::ma_mul(Register src1, Register src2, Register dest) {
+  as_mul(dest, src1, src2);
+}
+
+void MacroAssemblerARM::ma_mul(Register src1, Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch) {
+  ma_mov(imm, scratch);
+  as_mul(dest, src1, scratch);
+}
+
+Assembler::Condition MacroAssemblerARM::ma_check_mul(Register src1,
+                                                     Register src2,
+                                                     Register dest,
+                                                     AutoRegisterScope& scratch,
+                                                     Condition cond) {
+  // TODO: this operation is illegal on armv6 and earlier
+  // if src2 == scratch or src2 == dest.
+  if (cond == Equal || cond == NotEqual) {
+    as_smull(scratch, dest, src1, src2, SetCC);
+    return cond;
+  }
+
+  if (cond == Overflow) {
+    as_smull(scratch, dest, src1, src2);
+    as_cmp(scratch, asr(dest, 31));
+    return NotEqual;
+  }
+
+  MOZ_CRASH("Condition NYI");
+}
+
+Assembler::Condition MacroAssemblerARM::ma_check_mul(Register src1, Imm32 imm,
+                                                     Register dest,
+                                                     AutoRegisterScope& scratch,
+                                                     Condition cond) {
+  ma_mov(imm, scratch);
+
+  if (cond == Equal || cond == NotEqual) {
+    as_smull(scratch, dest, scratch, src1, SetCC);
+    return cond;
+  }
+
+  if (cond == Overflow) {
+    as_smull(scratch, dest, scratch, src1);
+    as_cmp(scratch, asr(dest, 31));
+    return NotEqual;
+  }
+
+  MOZ_CRASH("Condition NYI");
+}
+
+void MacroAssemblerARM::ma_umull(Register src1, Imm32 imm, Register destHigh,
+                                 Register destLow, AutoRegisterScope& scratch) {
+  ma_mov(imm, scratch);
+  as_umull(destHigh, destLow, src1, scratch);
+}
+
+void MacroAssemblerARM::ma_umull(Register src1, Register src2,
+                                 Register destHigh, Register destLow) {
+  as_umull(destHigh, destLow, src1, src2);
+}
+
+void MacroAssemblerARM::ma_mod_mask(Register src, Register dest, Register hold,
+                                    Register tmp, AutoRegisterScope& scratch,
+                                    AutoRegisterScope& scratch2,
+                                    int32_t shift) {
+  // We wish to compute x % (1<<y) - 1 for a known constant, y.
+  //
+  // 1. Let b = (1<<y) and C = (1<<y)-1, then think of the 32 bit dividend as
+  // a number in base b, namely c_0*1 + c_1*b + c_2*b^2 ... c_n*b^n
+  //
+  // 2. Since both addition and multiplication commute with modulus:
+  //   x % C == (c_0 + c_1*b + ... + c_n*b^n) % C ==
+  //    (c_0 % C) + (c_1%C) * (b % C) + (c_2 % C) * (b^2 % C)...
+  //
+  // 3. Since b == C + 1, b % C == 1, and b^n % C == 1 the whole thing
+  // simplifies to: c_0 + c_1 + c_2 ... c_n % C
+  //
+  // Each c_n can easily be computed by a shift/bitextract, and the modulus
+  // can be maintained by simply subtracting by C whenever the number gets
+  // over C.
+  int32_t mask = (1 << shift) - 1;
+  Label head;
+
+  // Register 'hold' holds -1 if the value was negative, 1 otherwise. The
+  // scratch reg holds the remaining bits that have not been processed lr
+  // serves as a temporary location to store extracted bits into as well as
+  // holding the trial subtraction as a temp value dest is the accumulator
+  // (and holds the final result)
+  //
+  // Move the whole value into tmp, setting the codition codes so we can muck
+  // with them later.
+  as_mov(tmp, O2Reg(src), SetCC);
+  // Zero out the dest.
+  ma_mov(Imm32(0), dest);
+  // Set the hold appropriately.
+  ma_mov(Imm32(1), hold);
+  ma_mov(Imm32(-1), hold, Signed);
+  as_rsb(tmp, tmp, Imm8(0), SetCC, Signed);
+
+  // Begin the main loop.
+  bind(&head);
+  {
+    // Extract the bottom bits.
+    ma_and(Imm32(mask), tmp, scratch, scratch2);
+    // Add those bits to the accumulator.
+    ma_add(scratch, dest, dest);
+    // Do a trial subtraction, this is the same operation as cmp, but we store
+    // the dest.
+    ma_sub(dest, Imm32(mask), scratch, scratch2, SetCC);
+    // If (sum - C) > 0, store sum - C back into sum, thus performing a modulus.
+    ma_mov(scratch, dest, LeaveCC, NotSigned);
+    // Get rid of the bits that we extracted before, and set the condition
+    // codes.
+    as_mov(tmp, lsr(tmp, shift), SetCC);
+    // If the shift produced zero, finish, otherwise, continue in the loop.
+    ma_b(&head, NonZero);
+  }
+
+  // Check the hold to see if we need to negate the result. Hold can only be
+  // 1 or -1, so this will never set the 0 flag.
+  as_cmp(hold, Imm8(0));
+  // If the hold was non-zero, negate the result to be in line with what JS
+  // wants this will set the condition codes if we try to negate.
+  as_rsb(dest, dest, Imm8(0), SetCC, Signed);
+  // Since the Zero flag is not set by the compare, we can *only* set the Zero
+  // flag in the rsb, so Zero is set iff we negated zero (e.g. the result of
+  // the computation was -0.0).
+}
+
+void MacroAssemblerARM::ma_smod(Register num, Register div, Register dest,
+                                AutoRegisterScope& scratch) {
+  as_sdiv(scratch, num, div);
+  as_mls(dest, num, scratch, div);
+}
+
+void MacroAssemblerARM::ma_umod(Register num, Register div, Register dest,
+                                AutoRegisterScope& scratch) {
+  as_udiv(scratch, num, div);
+  as_mls(dest, num, scratch, div);
+}
+
+// Division
+void MacroAssemblerARM::ma_sdiv(Register num, Register div, Register dest,
+                                Condition cond) {
+  as_sdiv(dest, num, div, cond);
+}
+
+void MacroAssemblerARM::ma_udiv(Register num, Register div, Register dest,
+                                Condition cond) {
+  as_udiv(dest, num, div, cond);
+}
+
+// Miscellaneous instructions.
+void MacroAssemblerARM::ma_clz(Register src, Register dest, Condition cond) {
+  as_clz(dest, src, cond);
+}
+
+void MacroAssemblerARM::ma_ctz(Register src, Register dest,
+                               AutoRegisterScope& scratch) {
+  // int c = __clz(a & -a);
+  // return a ? 31 - c : c;
+  as_rsb(scratch, src, Imm8(0), SetCC);
+  as_and(dest, src, O2Reg(scratch), LeaveCC);
+  as_clz(dest, dest);
+  as_rsb(dest, dest, Imm8(0x1F), LeaveCC, Assembler::NotEqual);
+}
+
+// Memory.
+// Shortcut for when we know we're transferring 32 bits of data.
+void MacroAssemblerARM::ma_dtr(LoadStore ls, Register rn, Imm32 offset,
+                               Register rt, AutoRegisterScope& scratch,
+                               Index mode, Assembler::Condition cc) {
+  ma_dataTransferN(ls, 32, true, rn, offset, rt, scratch, mode, cc);
+}
+
+void MacroAssemblerARM::ma_dtr(LoadStore ls, Register rt, const Address& addr,
+                               AutoRegisterScope& scratch, Index mode,
+                               Condition cc) {
+  ma_dataTransferN(ls, 32, true, addr.base, Imm32(addr.offset), rt, scratch,
+                   mode, cc);
+}
+
+void MacroAssemblerARM::ma_str(Register rt, DTRAddr addr, Index mode,
+                               Condition cc) {
+  as_dtr(IsStore, 32, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_str(Register rt, const Address& addr,
+                               AutoRegisterScope& scratch, Index mode,
+                               Condition cc) {
+  ma_dtr(IsStore, rt, addr, scratch, mode, cc);
+}
+
+void MacroAssemblerARM::ma_strd(Register rt, DebugOnly<Register> rt2,
+                                EDtrAddr addr, Index mode, Condition cc) {
+  MOZ_ASSERT((rt.code() & 1) == 0);
+  MOZ_ASSERT(rt2.value.code() == rt.code() + 1);
+  as_extdtr(IsStore, 64, true, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldr(DTRAddr addr, Register rt, Index mode,
+                               Condition cc) {
+  as_dtr(IsLoad, 32, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldr(const Address& addr, Register rt,
+                               AutoRegisterScope& scratch, Index mode,
+                               Condition cc) {
+  ma_dtr(IsLoad, rt, addr, scratch, mode, cc);
+}
+
+void MacroAssemblerARM::ma_ldrb(DTRAddr addr, Register rt, Index mode,
+                                Condition cc) {
+  as_dtr(IsLoad, 8, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldrsh(EDtrAddr addr, Register rt, Index mode,
+                                 Condition cc) {
+  as_extdtr(IsLoad, 16, true, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldrh(EDtrAddr addr, Register rt, Index mode,
+                                Condition cc) {
+  as_extdtr(IsLoad, 16, false, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldrsb(EDtrAddr addr, Register rt, Index mode,
+                                 Condition cc) {
+  as_extdtr(IsLoad, 8, true, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldrd(EDtrAddr addr, Register rt,
+                                DebugOnly<Register> rt2, Index mode,
+                                Condition cc) {
+  MOZ_ASSERT((rt.code() & 1) == 0);
+  MOZ_ASSERT(rt2.value.code() == rt.code() + 1);
+  MOZ_ASSERT(addr.maybeOffsetRegister() !=
+             rt);  // Undefined behavior if rm == rt/rt2.
+  MOZ_ASSERT(addr.maybeOffsetRegister() != rt2);
+  as_extdtr(IsLoad, 64, true, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_strh(Register rt, EDtrAddr addr, Index mode,
+                                Condition cc) {
+  as_extdtr(IsStore, 16, false, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_strb(Register rt, DTRAddr addr, Index mode,
+                                Condition cc) {
+  as_dtr(IsStore, 8, mode, rt, addr, cc);
+}
+
+// Specialty for moving N bits of data, where n == 8,16,32,64.
+BufferOffset MacroAssemblerARM::ma_dataTransferN(
+    LoadStore ls, int size, bool IsSigned, Register rn, Register rm,
+    Register rt, AutoRegisterScope& scratch, Index mode,
+    Assembler::Condition cc, Scale scale) {
+  MOZ_ASSERT(size == 8 || size == 16 || size == 32 || size == 64);
+
+  if (size == 32 || (size == 8 && !IsSigned)) {
+    return as_dtr(ls, size, mode, rt,
+                  DTRAddr(rn, DtrRegImmShift(rm, LSL, scale)), cc);
+  }
+
+  if (scale != TimesOne) {
+    ma_lsl(Imm32(scale), rm, scratch);
+    rm = scratch;
+  }
+
+  return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(rm)),
+                   cc);
+}
+
+// No scratch register is required if scale is TimesOne.
+BufferOffset MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size,
+                                                 bool IsSigned, Register rn,
+                                                 Register rm, Register rt,
+                                                 Index mode,
+                                                 Assembler::Condition cc) {
+  MOZ_ASSERT(size == 8 || size == 16 || size == 32 || size == 64);
+  if (size == 32 || (size == 8 && !IsSigned)) {
+    return as_dtr(ls, size, mode, rt,
+                  DTRAddr(rn, DtrRegImmShift(rm, LSL, TimesOne)), cc);
+  }
+  return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(rm)),
+                   cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size,
+                                                 bool IsSigned, Register rn,
+                                                 Imm32 offset, Register rt,
+                                                 AutoRegisterScope& scratch,
+                                                 Index mode,
+                                                 Assembler::Condition cc) {
+  MOZ_ASSERT(!(ls == IsLoad && mode == PostIndex && rt == pc),
+             "Large-offset PostIndex loading into PC requires special logic: "
+             "see ma_popn_pc().");
+
+  int off = offset.value;
+
+  // We can encode this as a standard ldr.
+  if (size == 32 || (size == 8 && !IsSigned)) {
+    if (off < 4096 && off > -4096) {
+      // This encodes as a single instruction, Emulating mode's behavior
+      // in a multi-instruction sequence is not necessary.
+      return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrOffImm(off)), cc);
+    }
+
+    // We cannot encode this offset in a single ldr. For mode == index,
+    // try to encode it as |add scratch, base, imm; ldr dest, [scratch,
+    // +offset]|. This does not wark for mode == PreIndex or mode == PostIndex.
+    // PreIndex is simple, just do the add into the base register first,
+    // then do a PreIndex'ed load. PostIndexed loads can be tricky.
+    // Normally, doing the load with an index of 0, then doing an add would
+    // work, but if the destination is the PC, you don't get to execute the
+    // instruction after the branch, which will lead to the base register
+    // not being updated correctly. Explicitly handle this case, without
+    // doing anything fancy, then handle all of the other cases.
+
+    // mode == Offset
+    //  add   scratch, base, offset_hi
+    //  ldr   dest, [scratch, +offset_lo]
+    //
+    // mode == PreIndex
+    //  add   base, base, offset_hi
+    //  ldr   dest, [base, +offset_lo]!
+
+    int bottom = off & 0xfff;
+    int neg_bottom = 0x1000 - bottom;
+
+    MOZ_ASSERT(rn != scratch);
+    MOZ_ASSERT(mode != PostIndex);
+
+    // At this point, both off - bottom and off + neg_bottom will be
+    // reasonable-ish quantities.
+    //
+    // Note a neg_bottom of 0x1000 can not be encoded as an immediate
+    // negative offset in the instruction and this occurs when bottom is
+    // zero, so this case is guarded against below.
+    if (off < 0) {
+      Operand2 sub_off = Imm8(-(off - bottom));  // sub_off = bottom - off
+      if (!sub_off.invalid()) {
+        // - sub_off = off - bottom
+        as_sub(scratch, rn, sub_off, LeaveCC, cc);
+        return as_dtr(ls, size, Offset, rt, DTRAddr(scratch, DtrOffImm(bottom)),
+                      cc);
+      }
+
+      // sub_off = -neg_bottom - off
+      sub_off = Imm8(-(off + neg_bottom));
+      if (!sub_off.invalid() && bottom != 0) {
+        // Guarded against by: bottom != 0
+        MOZ_ASSERT(neg_bottom < 0x1000);
+        // - sub_off = neg_bottom + off
+        as_sub(scratch, rn, sub_off, LeaveCC, cc);
+        return as_dtr(ls, size, Offset, rt,
+                      DTRAddr(scratch, DtrOffImm(-neg_bottom)), cc);
+      }
+    } else {
+      // sub_off = off - bottom
+      Operand2 sub_off = Imm8(off - bottom);
+      if (!sub_off.invalid()) {
+        //  sub_off = off - bottom
+        as_add(scratch, rn, sub_off, LeaveCC, cc);
+        return as_dtr(ls, size, Offset, rt, DTRAddr(scratch, DtrOffImm(bottom)),
+                      cc);
+      }
+
+      // sub_off = neg_bottom + off
+      sub_off = Imm8(off + neg_bottom);
+      if (!sub_off.invalid() && bottom != 0) {
+        // Guarded against by: bottom != 0
+        MOZ_ASSERT(neg_bottom < 0x1000);
+        // sub_off = neg_bottom + off
+        as_add(scratch, rn, sub_off, LeaveCC, cc);
+        return as_dtr(ls, size, Offset, rt,
+                      DTRAddr(scratch, DtrOffImm(-neg_bottom)), cc);
+      }
+    }
+
+    ma_mov(offset, scratch);
+    return as_dtr(ls, size, mode, rt,
+                  DTRAddr(rn, DtrRegImmShift(scratch, LSL, 0)));
+  } else {
+    // Should attempt to use the extended load/store instructions.
+    if (off < 256 && off > -256) {
+      return as_extdtr(ls, size, IsSigned, mode, rt,
+                       EDtrAddr(rn, EDtrOffImm(off)), cc);
+    }
+
+    // We cannot encode this offset in a single extldr. Try to encode it as
+    // an add scratch, base, imm; extldr dest, [scratch, +offset].
+    int bottom = off & 0xff;
+    int neg_bottom = 0x100 - bottom;
+    // At this point, both off - bottom and off + neg_bottom will be
+    // reasonable-ish quantities.
+    //
+    // Note a neg_bottom of 0x100 can not be encoded as an immediate
+    // negative offset in the instruction and this occurs when bottom is
+    // zero, so this case is guarded against below.
+    if (off < 0) {
+      // sub_off = bottom - off
+      Operand2 sub_off = Imm8(-(off - bottom));
+      if (!sub_off.invalid()) {
+        // - sub_off = off - bottom
+        as_sub(scratch, rn, sub_off, LeaveCC, cc);
+        return as_extdtr(ls, size, IsSigned, Offset, rt,
+                         EDtrAddr(scratch, EDtrOffImm(bottom)), cc);
+      }
+      // sub_off = -neg_bottom - off
+      sub_off = Imm8(-(off + neg_bottom));
+      if (!sub_off.invalid() && bottom != 0) {
+        // Guarded against by: bottom != 0
+        MOZ_ASSERT(neg_bottom < 0x100);
+        // - sub_off = neg_bottom + off
+        as_sub(scratch, rn, sub_off, LeaveCC, cc);
+        return as_extdtr(ls, size, IsSigned, Offset, rt,
+                         EDtrAddr(scratch, EDtrOffImm(-neg_bottom)), cc);
+      }
+    } else {
+      // sub_off = off - bottom
+      Operand2 sub_off = Imm8(off - bottom);
+      if (!sub_off.invalid()) {
+        // sub_off = off - bottom
+        as_add(scratch, rn, sub_off, LeaveCC, cc);
+        return as_extdtr(ls, size, IsSigned, Offset, rt,
+                         EDtrAddr(scratch, EDtrOffImm(bottom)), cc);
+      }
+      // sub_off = neg_bottom + off
+      sub_off = Imm8(off + neg_bottom);
+      if (!sub_off.invalid() && bottom != 0) {
+        // Guarded against by: bottom != 0
+        MOZ_ASSERT(neg_bottom < 0x100);
+        // sub_off = neg_bottom + off
+        as_add(scratch, rn, sub_off, LeaveCC, cc);
+        return as_extdtr(ls, size, IsSigned, Offset, rt,
+                         EDtrAddr(scratch, EDtrOffImm(-neg_bottom)), cc);
+      }
+    }
+    ma_mov(offset, scratch);
+    return as_extdtr(ls, size, IsSigned, mode, rt,
+                     EDtrAddr(rn, EDtrOffReg(scratch)), cc);
+  }
+}
+
+void MacroAssemblerARM::ma_pop(Register r) {
+  as_dtr(IsLoad, 32, PostIndex, r, DTRAddr(sp, DtrOffImm(4)));
+}
+
+void MacroAssemblerARM::ma_popn_pc(Imm32 n, AutoRegisterScope& scratch,
+                                   AutoRegisterScope& scratch2) {
+  // pc <- [sp]; sp += n
+  int32_t nv = n.value;
+
+  if (nv < 4096 && nv >= -4096) {
+    as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(nv)));
+  } else {
+    ma_mov(sp, scratch);
+    ma_add(Imm32(n), sp, scratch2);
+    as_dtr(IsLoad, 32, Offset, pc, DTRAddr(scratch, DtrOffImm(0)));
+  }
+}
+
+void MacroAssemblerARM::ma_push(Register r) {
+  MOZ_ASSERT(r != sp, "Use ma_push_sp().");
+  as_dtr(IsStore, 32, PreIndex, r, DTRAddr(sp, DtrOffImm(-4)));
+}
+
+void MacroAssemblerARM::ma_push_sp(Register r, AutoRegisterScope& scratch) {
+  // Pushing sp is not well-defined: use two instructions.
+  MOZ_ASSERT(r == sp);
+  ma_mov(sp, scratch);
+  as_dtr(IsStore, 32, PreIndex, scratch, DTRAddr(sp, DtrOffImm(-4)));
+}
+
+void MacroAssemblerARM::ma_vpop(VFPRegister r) {
+  startFloatTransferM(IsLoad, sp, IA, WriteBack);
+  transferFloatReg(r);
+  finishFloatTransfer();
+}
+
+void MacroAssemblerARM::ma_vpush(VFPRegister r) {
+  startFloatTransferM(IsStore, sp, DB, WriteBack);
+  transferFloatReg(r);
+  finishFloatTransfer();
+}
+
+// Barriers
+void MacroAssemblerARM::ma_dmb(BarrierOption option) {
+  if (HasDMBDSBISB()) {
+    as_dmb(option);
+  } else {
+    as_dmb_trap();
+  }
+}
+
+void MacroAssemblerARM::ma_dsb(BarrierOption option) {
+  if (HasDMBDSBISB()) {
+    as_dsb(option);
+  } else {
+    as_dsb_trap();
+  }
+}
+
+// Branches when done from within arm-specific code.
+BufferOffset MacroAssemblerARM::ma_b(Label* dest, Assembler::Condition c) {
+  return as_b(dest, c);
+}
+
+void MacroAssemblerARM::ma_bx(Register dest, Assembler::Condition c) {
+  as_bx(dest, c);
+}
+
+void MacroAssemblerARM::ma_b(void* target, Assembler::Condition c) {
+  // An immediate pool is used for easier patching.
+  as_Imm32Pool(pc, uint32_t(target), c);
+}
+
+// This is almost NEVER necessary: we'll basically never be calling a label,
+// except possibly in the crazy bailout-table case.
+void MacroAssemblerARM::ma_bl(Label* dest, Assembler::Condition c) {
+  as_bl(dest, c);
+}
+
+void MacroAssemblerARM::ma_blx(Register reg, Assembler::Condition c) {
+  as_blx(reg, c);
+}
+
+// VFP/ALU
+void MacroAssemblerARM::ma_vadd(FloatRegister src1, FloatRegister src2,
+                                FloatRegister dst) {
+  as_vadd(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
+}
+
+void MacroAssemblerARM::ma_vadd_f32(FloatRegister src1, FloatRegister src2,
+                                    FloatRegister dst) {
+  as_vadd(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+          VFPRegister(src2).singleOverlay());
+}
+
+void MacroAssemblerARM::ma_vsub(FloatRegister src1, FloatRegister src2,
+                                FloatRegister dst) {
+  as_vsub(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
+}
+
+void MacroAssemblerARM::ma_vsub_f32(FloatRegister src1, FloatRegister src2,
+                                    FloatRegister dst) {
+  as_vsub(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+          VFPRegister(src2).singleOverlay());
+}
+
+void MacroAssemblerARM::ma_vmul(FloatRegister src1, FloatRegister src2,
+                                FloatRegister dst) {
+  as_vmul(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
+}
+
+void MacroAssemblerARM::ma_vmul_f32(FloatRegister src1, FloatRegister src2,
+                                    FloatRegister dst) {
+  as_vmul(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+          VFPRegister(src2).singleOverlay());
+}
+
+void MacroAssemblerARM::ma_vdiv(FloatRegister src1, FloatRegister src2,
+                                FloatRegister dst) {
+  as_vdiv(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
+}
+
+void MacroAssemblerARM::ma_vdiv_f32(FloatRegister src1, FloatRegister src2,
+                                    FloatRegister dst) {
+  as_vdiv(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+          VFPRegister(src2).singleOverlay());
+}
+
+void MacroAssemblerARM::ma_vmov(FloatRegister src, FloatRegister dest,
+                                Condition cc) {
+  as_vmov(dest, src, cc);
+}
+
+void MacroAssemblerARM::ma_vmov_f32(FloatRegister src, FloatRegister dest,
+                                    Condition cc) {
+  as_vmov(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+          cc);
+}
+
+void MacroAssemblerARM::ma_vneg(FloatRegister src, FloatRegister dest,
+                                Condition cc) {
+  as_vneg(dest, src, cc);
+}
+
+void MacroAssemblerARM::ma_vneg_f32(FloatRegister src, FloatRegister dest,
+                                    Condition cc) {
+  as_vneg(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+          cc);
+}
+
+void MacroAssemblerARM::ma_vabs(FloatRegister src, FloatRegister dest,
+                                Condition cc) {
+  as_vabs(dest, src, cc);
+}
+
+void MacroAssemblerARM::ma_vabs_f32(FloatRegister src, FloatRegister dest,
+                                    Condition cc) {
+  as_vabs(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+          cc);
+}
+
+void MacroAssemblerARM::ma_vsqrt(FloatRegister src, FloatRegister dest,
+                                 Condition cc) {
+  as_vsqrt(dest, src, cc);
+}
+
+void MacroAssemblerARM::ma_vsqrt_f32(FloatRegister src, FloatRegister dest,
+                                     Condition cc) {
+  as_vsqrt(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+           cc);
+}
+
+static inline uint32_t DoubleHighWord(double d) {
+  return static_cast<uint32_t>(BitwiseCast<uint64_t>(d) >> 32);
+}
+
+static inline uint32_t DoubleLowWord(double d) {
+  return static_cast<uint32_t>(BitwiseCast<uint64_t>(d)) & uint32_t(0xffffffff);
+}
+
+void MacroAssemblerARM::ma_vimm(double value, FloatRegister dest,
+                                Condition cc) {
+  if (HasVFPv3()) {
+    if (DoubleLowWord(value) == 0) {
+      if (DoubleHighWord(value) == 0) {
+        // To zero a register, load 1.0, then execute dN <- dN - dN
+        as_vimm(dest, VFPImm::One, cc);
+        as_vsub(dest, dest, dest, cc);
+        return;
+      }
+
+      VFPImm enc(DoubleHighWord(value));
+      if (enc.isValid()) {
+        as_vimm(dest, enc, cc);
+        return;
+      }
+    }
+  }
+  // Fall back to putting the value in a pool.
+  as_FImm64Pool(dest, value, cc);
+}
+
+void MacroAssemblerARM::ma_vimm_f32(float value, FloatRegister dest,
+                                    Condition cc) {
+  VFPRegister vd = VFPRegister(dest).singleOverlay();
+  if (HasVFPv3()) {
+    if (IsPositiveZero(value)) {
+      // To zero a register, load 1.0, then execute sN <- sN - sN.
+      as_vimm(vd, VFPImm::One, cc);
+      as_vsub(vd, vd, vd, cc);
+      return;
+    }
+
+    // Note that the vimm immediate float32 instruction encoding differs
+    // from the vimm immediate double encoding, but this difference matches
+    // the difference in the floating point formats, so it is possible to
+    // convert the float32 to a double and then use the double encoding
+    // paths. It is still necessary to firstly check that the double low
+    // word is zero because some float32 numbers set these bits and this can
+    // not be ignored.
+    double doubleValue(value);
+    if (DoubleLowWord(doubleValue) == 0) {
+      VFPImm enc(DoubleHighWord(doubleValue));
+      if (enc.isValid()) {
+        as_vimm(vd, enc, cc);
+        return;
+      }
+    }
+  }
+
+  // Fall back to putting the value in a pool.
+  as_FImm32Pool(vd, value, cc);
+}
+
+void MacroAssemblerARM::ma_vcmp(FloatRegister src1, FloatRegister src2,
+                                Condition cc) {
+  as_vcmp(VFPRegister(src1), VFPRegister(src2), cc);
+}
+
+void MacroAssemblerARM::ma_vcmp_f32(FloatRegister src1, FloatRegister src2,
+                                    Condition cc) {
+  as_vcmp(VFPRegister(src1).singleOverlay(), VFPRegister(src2).singleOverlay(),
+          cc);
+}
+
+void MacroAssemblerARM::ma_vcmpz(FloatRegister src1, Condition cc) {
+  as_vcmpz(VFPRegister(src1), cc);
+}
+
+void MacroAssemblerARM::ma_vcmpz_f32(FloatRegister src1, Condition cc) {
+  as_vcmpz(VFPRegister(src1).singleOverlay(), cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_F64_I32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isDouble());
+  MOZ_ASSERT(dest.isSInt());
+  as_vcvt(dest, src, false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_F64_U32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isDouble());
+  MOZ_ASSERT(dest.isUInt());
+  as_vcvt(dest, src, false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_I32_F64(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isSInt());
+  MOZ_ASSERT(dest.isDouble());
+  as_vcvt(dest, src, false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_U32_F64(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isUInt());
+  MOZ_ASSERT(dest.isDouble());
+  as_vcvt(dest, src, false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_F32_I32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isSingle());
+  MOZ_ASSERT(dest.isSInt());
+  as_vcvt(VFPRegister(dest).sintOverlay(), VFPRegister(src).singleOverlay(),
+          false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_F32_U32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isSingle());
+  MOZ_ASSERT(dest.isUInt());
+  as_vcvt(VFPRegister(dest).uintOverlay(), VFPRegister(src).singleOverlay(),
+          false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_I32_F32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isSInt());
+  MOZ_ASSERT(dest.isSingle());
+  as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src).sintOverlay(),
+          false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_U32_F32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isUInt());
+  MOZ_ASSERT(dest.isSingle());
+  as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src).uintOverlay(),
+          false, cc);
+}
+
+void MacroAssemblerARM::ma_vxfer(FloatRegister src, Register dest,
+                                 Condition cc) {
+  as_vxfer(dest, InvalidReg, VFPRegister(src).singleOverlay(), FloatToCore, cc);
+}
+
+void MacroAssemblerARM::ma_vxfer(FloatRegister src, Register dest1,
+                                 Register dest2, Condition cc) {
+  as_vxfer(dest1, dest2, VFPRegister(src), FloatToCore, cc);
+}
+
+void MacroAssemblerARM::ma_vxfer(Register src, FloatRegister dest,
+                                 Condition cc) {
+  as_vxfer(src, InvalidReg, VFPRegister(dest).singleOverlay(), CoreToFloat, cc);
+}
+
+void MacroAssemblerARM::ma_vxfer(Register src1, Register src2,
+                                 FloatRegister dest, Condition cc) {
+  as_vxfer(src1, src2, VFPRegister(dest), CoreToFloat, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vdtr(LoadStore ls, const Address& addr,
+                                        VFPRegister rt,
+                                        AutoRegisterScope& scratch,
+                                        Condition cc) {
+  int off = addr.offset;
+  MOZ_ASSERT((off & 3) == 0);
+  Register base = addr.base;
+  if (off > -1024 && off < 1024) {
+    return as_vdtr(ls, rt, Operand(addr).toVFPAddr(), cc);
+  }
+
+  // We cannot encode this offset in a a single ldr. Try to encode it as an
+  // add scratch, base, imm; ldr dest, [scratch, +offset].
+  int bottom = off & (0xff << 2);
+  int neg_bottom = (0x100 << 2) - bottom;
+  // At this point, both off - bottom and off + neg_bottom will be
+  // reasonable-ish quantities.
+  //
+  // Note a neg_bottom of 0x400 can not be encoded as an immediate negative
+  // offset in the instruction and this occurs when bottom is zero, so this
+  // case is guarded against below.
+  if (off < 0) {
+    // sub_off = bottom - off
+    Operand2 sub_off = Imm8(-(off - bottom));
+    if (!sub_off.invalid()) {
+      // - sub_off = off - bottom
+      as_sub(scratch, base, sub_off, LeaveCC, cc);
+      return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(bottom)), cc);
+    }
+    // sub_off = -neg_bottom - off
+    sub_off = Imm8(-(off + neg_bottom));
+    if (!sub_off.invalid() && bottom != 0) {
+      // Guarded against by: bottom != 0
+      MOZ_ASSERT(neg_bottom < 0x400);
+      // - sub_off = neg_bottom + off
+      as_sub(scratch, base, sub_off, LeaveCC, cc);
+      return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(-neg_bottom)), cc);
+    }
+  } else {
+    // sub_off = off - bottom
+    Operand2 sub_off = Imm8(off - bottom);
+    if (!sub_off.invalid()) {
+      // sub_off = off - bottom
+      as_add(scratch, base, sub_off, LeaveCC, cc);
+      return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(bottom)), cc);
+    }
+    // sub_off = neg_bottom + off
+    sub_off = Imm8(off + neg_bottom);
+    if (!sub_off.invalid() && bottom != 0) {
+      // Guarded against by: bottom != 0
+      MOZ_ASSERT(neg_bottom < 0x400);
+      // sub_off = neg_bottom + off
+      as_add(scratch, base, sub_off, LeaveCC, cc);
+      return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(-neg_bottom)), cc);
+    }
+  }
+
+  // Safe to use scratch as dest, since ma_add() overwrites dest at the end
+  // and can't use it as internal scratch since it may also == base.
+  ma_add(base, Imm32(off), scratch, scratch, LeaveCC, cc);
+  return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(0)), cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vldr(VFPAddr addr, VFPRegister dest,
+                                        Condition cc) {
+  return as_vdtr(IsLoad, dest, addr, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vldr(const Address& addr, VFPRegister dest,
+                                        AutoRegisterScope& scratch,
+                                        Condition cc) {
+  return ma_vdtr(IsLoad, addr, dest, scratch, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vldr(VFPRegister src, Register base,
+                                        Register index,
+                                        AutoRegisterScope& scratch,
+                                        int32_t shift, Condition cc) {
+  as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
+  return as_vdtr(IsLoad, src, Operand(Address(scratch, 0)).toVFPAddr(), cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vstr(VFPRegister src, VFPAddr addr,
+                                        Condition cc) {
+  return as_vdtr(IsStore, src, addr, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vstr(VFPRegister src, const Address& addr,
+                                        AutoRegisterScope& scratch,
+                                        Condition cc) {
+  return ma_vdtr(IsStore, addr, src, scratch, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vstr(
+    VFPRegister src, Register base, Register index, AutoRegisterScope& scratch,
+    AutoRegisterScope& scratch2, int32_t shift, int32_t offset, Condition cc) {
+  as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
+  return ma_vstr(src, Address(scratch, offset), scratch2, cc);
+}
+
+// Without an offset, no second scratch register is necessary.
+BufferOffset MacroAssemblerARM::ma_vstr(VFPRegister src, Register base,
+                                        Register index,
+                                        AutoRegisterScope& scratch,
+                                        int32_t shift, Condition cc) {
+  as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
+  return as_vdtr(IsStore, src, Operand(Address(scratch, 0)).toVFPAddr(), cc);
+}
+
+bool MacroAssemblerARMCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) {
+  asMasm().PushFrameDescriptor(FrameType::IonJS);  // descriptor_
+  asMasm().Push(ImmPtr(fakeReturnAddr));
+  asMasm().Push(FramePointer);
+  return true;
+}
+
+void MacroAssemblerARMCompat::move32(Imm32 imm, Register dest) {
+  ma_mov(imm, dest);
+}
+
+void MacroAssemblerARMCompat::move32(Register src, Register dest) {
+  ma_mov(src, dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(Register src, Register dest) {
+  ma_mov(src, dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(ImmWord imm, Register dest) {
+  ma_mov(Imm32(imm.value), dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(ImmGCPtr imm, Register dest) {
+  ma_mov(imm, dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(ImmPtr imm, Register dest) {
+  movePtr(ImmWord(uintptr_t(imm.value)), dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(wasm::SymbolicAddress imm,
+                                      Register dest) {
+  append(wasm::SymbolicAccess(CodeOffset(currentOffset()), imm));
+  ma_movPatchable(Imm32(-1), dest, Always);
+}
+
+void MacroAssemblerARMCompat::load8ZeroExtend(const Address& address,
+                                              Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsLoad, 8, false, address.base, Imm32(address.offset), dest,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::load8ZeroExtend(const BaseIndex& src,
+                                              Register dest) {
+  Register base = src.base;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (src.offset == 0) {
+    ma_ldrb(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), dest);
+  } else {
+    ma_add(base, Imm32(src.offset), scratch, scratch2);
+    ma_ldrb(DTRAddr(scratch, DtrRegImmShift(src.index, LSL, scale)), dest);
+  }
+}
+
+void MacroAssemblerARMCompat::load8SignExtend(const Address& address,
+                                              Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsLoad, 8, true, address.base, Imm32(address.offset), dest,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::load8SignExtend(const BaseIndex& src,
+                                              Register dest) {
+  Register index = src.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // ARMv7 does not have LSL on an index register with an extended load.
+  if (src.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (src.offset != 0) {
+    if (index != scratch) {
+      ma_mov(index, scratch);
+      index = scratch;
+    }
+    ma_add(Imm32(src.offset), index, scratch2);
+  }
+  ma_ldrsb(EDtrAddr(src.base, EDtrOffReg(index)), dest);
+}
+
+void MacroAssemblerARMCompat::load16ZeroExtend(const Address& address,
+                                               Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsLoad, 16, false, address.base, Imm32(address.offset), dest,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::load16ZeroExtend(const BaseIndex& src,
+                                               Register dest) {
+  Register index = src.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // ARMv7 does not have LSL on an index register with an extended load.
+  if (src.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (src.offset != 0) {
+    if (index != scratch) {
+      ma_mov(index, scratch);
+      index = scratch;
+    }
+    ma_add(Imm32(src.offset), index, scratch2);
+  }
+  ma_ldrh(EDtrAddr(src.base, EDtrOffReg(index)), dest);
+}
+
+void MacroAssemblerARMCompat::load16SignExtend(const Address& address,
+                                               Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsLoad, 16, true, address.base, Imm32(address.offset), dest,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::load16SignExtend(const BaseIndex& src,
+                                               Register dest) {
+  Register index = src.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // We don't have LSL on index register yet.
+  if (src.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (src.offset != 0) {
+    if (index != scratch) {
+      ma_mov(index, scratch);
+      index = scratch;
+    }
+    ma_add(Imm32(src.offset), index, scratch2);
+  }
+  ma_ldrsh(EDtrAddr(src.base, EDtrOffReg(index)), dest);
+}
+
+void MacroAssemblerARMCompat::load32(const Address& address, Register dest) {
+  loadPtr(address, dest);
+}
+
+void MacroAssemblerARMCompat::load32(const BaseIndex& address, Register dest) {
+  loadPtr(address, dest);
+}
+
+void MacroAssemblerARMCompat::load32(AbsoluteAddress address, Register dest) {
+  loadPtr(address, dest);
+}
+
+void MacroAssemblerARMCompat::loadPtr(const Address& address, Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_ldr(address, dest, scratch);
+}
+
+void MacroAssemblerARMCompat::loadPtr(const BaseIndex& src, Register dest) {
+  Register base = src.base;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (src.offset != 0) {
+    ma_add(base, Imm32(src.offset), scratch, scratch2);
+    ma_ldr(DTRAddr(scratch, DtrRegImmShift(src.index, LSL, scale)), dest);
+  } else {
+    ma_ldr(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), dest);
+  }
+}
+
+void MacroAssemblerARMCompat::loadPtr(AbsoluteAddress address, Register dest) {
+  MOZ_ASSERT(dest != pc);  // Use dest as a scratch register.
+  movePtr(ImmWord(uintptr_t(address.addr)), dest);
+  loadPtr(Address(dest, 0), dest);
+}
+
+void MacroAssemblerARMCompat::loadPtr(wasm::SymbolicAddress address,
+                                      Register dest) {
+  MOZ_ASSERT(dest != pc);  // Use dest as a scratch register.
+  movePtr(address, dest);
+  loadPtr(Address(dest, 0), dest);
+}
+
+void MacroAssemblerARMCompat::loadPrivate(const Address& address,
+                                          Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_ldr(ToPayload(address), dest, scratch);
+}
+
+void MacroAssemblerARMCompat::loadDouble(const Address& address,
+                                         FloatRegister dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_vldr(address, dest, scratch);
+}
+
+void MacroAssemblerARMCompat::loadDouble(const BaseIndex& src,
+                                         FloatRegister dest) {
+  // VFP instructions don't even support register Base + register Index modes,
+  // so just add the index, then handle the offset like normal.
+  Register base = src.base;
+  Register index = src.index;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+  int32_t offset = src.offset;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  as_add(scratch, base, lsl(index, scale));
+  ma_vldr(Address(scratch, offset), dest, scratch2);
+}
+
+void MacroAssemblerARMCompat::loadFloatAsDouble(const Address& address,
+                                                FloatRegister dest) {
+  ScratchRegisterScope scratch(asMasm());
+
+  VFPRegister rt = dest;
+  ma_vldr(address, rt.singleOverlay(), scratch);
+  as_vcvt(rt, rt.singleOverlay());
+}
+
+void MacroAssemblerARMCompat::loadFloatAsDouble(const BaseIndex& src,
+                                                FloatRegister dest) {
+  // VFP instructions don't even support register Base + register Index modes,
+  // so just add the index, then handle the offset like normal.
+  Register base = src.base;
+  Register index = src.index;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+  int32_t offset = src.offset;
+  VFPRegister rt = dest;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  as_add(scratch, base, lsl(index, scale));
+  ma_vldr(Address(scratch, offset), rt.singleOverlay(), scratch2);
+  as_vcvt(rt, rt.singleOverlay());
+}
+
+void MacroAssemblerARMCompat::loadFloat32(const Address& address,
+                                          FloatRegister dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_vldr(address, VFPRegister(dest).singleOverlay(), scratch);
+}
+
+void MacroAssemblerARMCompat::loadFloat32(const BaseIndex& src,
+                                          FloatRegister dest) {
+  // VFP instructions don't even support register Base + register Index modes,
+  // so just add the index, then handle the offset like normal.
+  Register base = src.base;
+  Register index = src.index;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+  int32_t offset = src.offset;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  as_add(scratch, base, lsl(index, scale));
+  ma_vldr(Address(scratch, offset), VFPRegister(dest).singleOverlay(),
+          scratch2);
+}
+
+void MacroAssemblerARMCompat::store8(Imm32 imm, const Address& address) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_mov(imm, scratch2);
+  store8(scratch2, address);
+}
+
+void MacroAssemblerARMCompat::store8(Register src, const Address& address) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsStore, 8, false, address.base, Imm32(address.offset), src,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::store8(Imm32 imm, const BaseIndex& dest) {
+  Register base = dest.base;
+  uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (dest.offset != 0) {
+    ma_add(base, Imm32(dest.offset), scratch, scratch2);
+    ma_mov(imm, scratch2);
+    ma_strb(scratch2, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+  } else {
+    ma_mov(imm, scratch2);
+    ma_strb(scratch2, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::store8(Register src, const BaseIndex& dest) {
+  Register base = dest.base;
+  uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (dest.offset != 0) {
+    ma_add(base, Imm32(dest.offset), scratch, scratch2);
+    ma_strb(src, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+  } else {
+    ma_strb(src, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::store16(Imm32 imm, const Address& address) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_mov(imm, scratch2);
+  store16(scratch2, address);
+}
+
+void MacroAssemblerARMCompat::store16(Register src, const Address& address) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsStore, 16, false, address.base, Imm32(address.offset), src,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::store16(Imm32 imm, const BaseIndex& dest) {
+  Register index = dest.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // We don't have LSL on index register yet.
+  if (dest.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(dest.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (dest.offset != 0) {
+    ma_add(index, Imm32(dest.offset), scratch, scratch2);
+    index = scratch;
+  }
+
+  ma_mov(imm, scratch2);
+  ma_strh(scratch2, EDtrAddr(dest.base, EDtrOffReg(index)));
+}
+
+void MacroAssemblerARMCompat::store16(Register src, const BaseIndex& address) {
+  Register index = address.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // We don't have LSL on index register yet.
+  if (address.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(address.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (address.offset != 0) {
+    ma_add(index, Imm32(address.offset), scratch, scratch2);
+    index = scratch;
+  }
+  ma_strh(src, EDtrAddr(address.base, EDtrOffReg(index)));
+}
+
+void MacroAssemblerARMCompat::store32(Register src, AbsoluteAddress address) {
+  storePtr(src, address);
+}
+
+void MacroAssemblerARMCompat::store32(Register src, const Address& address) {
+  storePtr(src, address);
+}
+
+void MacroAssemblerARMCompat::store32(Imm32 src, const Address& address) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  move32(src, scratch);
+  ma_str(scratch, address, scratch2);
+}
+
+void MacroAssemblerARMCompat::store32(Imm32 imm, const BaseIndex& dest) {
+  Register base = dest.base;
+  uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (dest.offset != 0) {
+    ma_add(base, Imm32(dest.offset), scratch, scratch2);
+    ma_mov(imm, scratch2);
+    ma_str(scratch2, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+  } else {
+    ma_mov(imm, scratch);
+    ma_str(scratch, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::store32(Register src, const BaseIndex& dest) {
+  Register base = dest.base;
+  uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (dest.offset != 0) {
+    ma_add(base, Imm32(dest.offset), scratch, scratch2);
+    ma_str(src, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+  } else {
+    ma_str(src, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmWord imm, const Address& address) {
+  store32(Imm32(imm.value), address);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmWord imm, const BaseIndex& address) {
+  store32(Imm32(imm.value), address);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmPtr imm, const Address& address) {
+  store32(Imm32(uintptr_t(imm.value)), address);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmPtr imm, const BaseIndex& address) {
+  store32(Imm32(uintptr_t(imm.value)), address);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmGCPtr imm, const Address& address) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_mov(imm, scratch);
+  ma_str(scratch, address, scratch2);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmGCPtr imm, const BaseIndex& address) {
+  Register base = address.base;
+  uint32_t scale = Imm32::ShiftOf(address.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (address.offset != 0) {
+    ma_add(base, Imm32(address.offset), scratch, scratch2);
+    ma_mov(imm, scratch2);
+    ma_str(scratch2,
+           DTRAddr(scratch, DtrRegImmShift(address.index, LSL, scale)));
+  } else {
+    ma_mov(imm, scratch);
+    ma_str(scratch, DTRAddr(base, DtrRegImmShift(address.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::storePtr(Register src, const Address& address) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_str(src, address, scratch2);
+}
+
+void MacroAssemblerARMCompat::storePtr(Register src, const BaseIndex& address) {
+  store32(src, address);
+}
+
+void MacroAssemblerARMCompat::storePtr(Register src, AbsoluteAddress dest) {
+  ScratchRegisterScope scratch(asMasm());
+  movePtr(ImmWord(uintptr_t(dest.addr)), scratch);
+  ma_str(src, DTRAddr(scratch, DtrOffImm(0)));
+}
+
+// Note: this function clobbers the input register.
+void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
+  if (HasVFPv3()) {
+    Label notSplit;
+    {
+      ScratchDoubleScope scratchDouble(*this);
+      MOZ_ASSERT(input != scratchDouble);
+      loadConstantDouble(0.5, scratchDouble);
+
+      ma_vadd(input, scratchDouble, scratchDouble);
+      // Convert the double into an unsigned fixed point value with 24 bits of
+      // precision. The resulting number will look like 0xII.DDDDDD
+      as_vcvtFixed(scratchDouble, false, 24, true);
+    }
+
+    // Move the fixed point value into an integer register.
+    {
+      ScratchFloat32Scope scratchFloat(*this);
+      as_vxfer(output, InvalidReg, scratchFloat.uintOverlay(), FloatToCore);
+    }
+
+    ScratchRegisterScope scratch(*this);
+
+    // See if this value *might* have been an exact integer after adding
+    // 0.5. This tests the 1/2 through 1/16,777,216th places, but 0.5 needs
+    // to be tested out to the 1/140,737,488,355,328th place.
+    ma_tst(output, Imm32(0x00ffffff), scratch);
+    // Convert to a uint8 by shifting out all of the fraction bits.
+    ma_lsr(Imm32(24), output, output);
+    // If any of the bottom 24 bits were non-zero, then we're good, since
+    // this number can't be exactly XX.0
+    ma_b(&notSplit, NonZero);
+    as_vxfer(scratch, InvalidReg, input, FloatToCore);
+    as_cmp(scratch, Imm8(0));
+    // If the lower 32 bits of the double were 0, then this was an exact number,
+    // and it should be even.
+    as_bic(output, output, Imm8(1), LeaveCC, Zero);
+    bind(&notSplit);
+  } else {
+    ScratchDoubleScope scratchDouble(*this);
+    MOZ_ASSERT(input != scratchDouble);
+    loadConstantDouble(0.5, scratchDouble);
+
+    Label outOfRange;
+    ma_vcmpz(input);
+    // Do the add, in place so we can reference it later.
+    ma_vadd(input, scratchDouble, input);
+    // Do the conversion to an integer.
+    as_vcvt(VFPRegister(scratchDouble).uintOverlay(), VFPRegister(input));
+    // Copy the converted value out.
+    as_vxfer(output, InvalidReg, scratchDouble, FloatToCore);
+    as_vmrs(pc);
+    ma_mov(Imm32(0), output, Overflow);  // NaN => 0
+    ma_b(&outOfRange, Overflow);         // NaN
+    as_cmp(output, Imm8(0xff));
+    ma_mov(Imm32(0xff), output, Above);
+    ma_b(&outOfRange, Above);
+    // Convert it back to see if we got the same value back.
+    as_vcvt(scratchDouble, VFPRegister(scratchDouble).uintOverlay());
+    // Do the check.
+    as_vcmp(scratchDouble, input);
+    as_vmrs(pc);
+    as_bic(output, output, Imm8(1), LeaveCC, Zero);
+    bind(&outOfRange);
+  }
+}
+
+void MacroAssemblerARMCompat::cmp32(Register lhs, Imm32 rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_cmp(lhs, rhs, scratch);
+}
+
+void MacroAssemblerARMCompat::cmp32(Register lhs, Register rhs) {
+  ma_cmp(lhs, rhs);
+}
+
+void MacroAssemblerARMCompat::cmp32(const Address& lhs, Imm32 rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs, scratch2);
+}
+
+void MacroAssemblerARMCompat::cmp32(const Address& lhs, Register rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmWord rhs) {
+  cmp32(lhs, Imm32(rhs.value));
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmPtr rhs) {
+  cmpPtr(lhs, ImmWord(uintptr_t(rhs.value)));
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, Register rhs) {
+  ma_cmp(lhs, rhs);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmGCPtr rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_cmp(lhs, rhs, scratch);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, Imm32 rhs) {
+  cmp32(lhs, rhs);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, Register rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmWord rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, Imm32(rhs.value), scratch2);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmPtr rhs) {
+  cmpPtr(lhs, ImmWord(uintptr_t(rhs.value)));
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmGCPtr rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs, scratch2);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, Imm32 rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs, scratch2);
+}
+
+void MacroAssemblerARMCompat::setStackArg(Register reg, uint32_t arg) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsStore, 32, true, sp, Imm32(arg * sizeof(intptr_t)), reg,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::minMaxDouble(FloatRegister srcDest,
+                                           FloatRegister second, bool canBeNaN,
+                                           bool isMax) {
+  FloatRegister first = srcDest;
+
+  Label nan, equal, returnSecond, done;
+
+  Assembler::Condition cond = isMax ? Assembler::VFP_LessThanOrEqual
+                                    : Assembler::VFP_GreaterThanOrEqual;
+
+  compareDouble(first, second);
+  // First or second is NaN, result is NaN.
+  ma_b(&nan, Assembler::VFP_Unordered);
+  // Make sure we handle -0 and 0 right.
+  ma_b(&equal, Assembler::VFP_Equal);
+  ma_b(&returnSecond, cond);
+  ma_b(&done);
+
+  // Check for zero.
+  bind(&equal);
+  compareDouble(first, NoVFPRegister);
+  // First wasn't 0 or -0, so just return it.
+  ma_b(&done, Assembler::VFP_NotEqualOrUnordered);
+  // So now both operands are either -0 or 0.
+  if (isMax) {
+    // -0 + -0 = -0 and -0 + 0 = 0.
+    ma_vadd(second, first, first);
+  } else {
+    ma_vneg(first, first);
+    ma_vsub(first, second, first);
+    ma_vneg(first, first);
+  }
+  ma_b(&done);
+
+  bind(&nan);
+  // If the first argument is the NaN, return it; otherwise return the second
+  // operand.
+  compareDouble(first, first);
+  ma_vmov(first, srcDest, Assembler::VFP_Unordered);
+  ma_b(&done, Assembler::VFP_Unordered);
+
+  bind(&returnSecond);
+  ma_vmov(second, srcDest);
+
+  bind(&done);
+}
+
+void MacroAssemblerARMCompat::minMaxFloat32(FloatRegister srcDest,
+                                            FloatRegister second, bool canBeNaN,
+                                            bool isMax) {
+  FloatRegister first = srcDest;
+
+  Label nan, equal, returnSecond, done;
+
+  Assembler::Condition cond = isMax ? Assembler::VFP_LessThanOrEqual
+                                    : Assembler::VFP_GreaterThanOrEqual;
+
+  compareFloat(first, second);
+  // First or second is NaN, result is NaN.
+  ma_b(&nan, Assembler::VFP_Unordered);
+  // Make sure we handle -0 and 0 right.
+  ma_b(&equal, Assembler::VFP_Equal);
+  ma_b(&returnSecond, cond);
+  ma_b(&done);
+
+  // Check for zero.
+  bind(&equal);
+  compareFloat(first, NoVFPRegister);
+  // First wasn't 0 or -0, so just return it.
+  ma_b(&done, Assembler::VFP_NotEqualOrUnordered);
+  // So now both operands are either -0 or 0.
+  if (isMax) {
+    // -0 + -0 = -0 and -0 + 0 = 0.
+    ma_vadd_f32(second, first, first);
+  } else {
+    ma_vneg_f32(first, first);
+    ma_vsub_f32(first, second, first);
+    ma_vneg_f32(first, first);
+  }
+  ma_b(&done);
+
+  bind(&nan);
+  // See comment in minMaxDouble.
+  compareFloat(first, first);
+  ma_vmov_f32(first, srcDest, Assembler::VFP_Unordered);
+  ma_b(&done, Assembler::VFP_Unordered);
+
+  bind(&returnSecond);
+  ma_vmov_f32(second, srcDest);
+
+  bind(&done);
+}
+
+void MacroAssemblerARMCompat::compareDouble(FloatRegister lhs,
+                                            FloatRegister rhs) {
+  // Compare the doubles, setting vector status flags.
+  if (rhs.isMissing()) {
+    ma_vcmpz(lhs);
+  } else {
+    ma_vcmp(lhs, rhs);
+  }
+
+  // Move vector status bits to normal status flags.
+  as_vmrs(pc);
+}
+
+void MacroAssemblerARMCompat::compareFloat(FloatRegister lhs,
+                                           FloatRegister rhs) {
+  // Compare the doubles, setting vector status flags.
+  if (rhs.isMissing()) {
+    as_vcmpz(VFPRegister(lhs).singleOverlay());
+  } else {
+    as_vcmp(VFPRegister(lhs).singleOverlay(), VFPRegister(rhs).singleOverlay());
+  }
+
+  // Move vector status bits to normal status flags.
+  as_vmrs(pc);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testInt32(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_INT32));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBoolean(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_BOOLEAN));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDouble(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  Assembler::Condition actual = (cond == Equal) ? Below : AboveOrEqual;
+  ScratchRegisterScope scratch(asMasm());
+  ma_cmp(value.typeReg(), ImmTag(JSVAL_TAG_CLEAR), scratch);
+  return actual;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNull(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_NULL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testUndefined(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_UNDEFINED));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testString(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testString(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testSymbol(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testSymbol(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigInt(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testBigInt(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testObject(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testObject(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNumber(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testNumber(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testMagic(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testMagic(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testPrimitive(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testPrimitive(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testGCThing(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testGCThing(cond, value.typeReg());
+}
+
+// Register-based tests.
+Assembler::Condition MacroAssemblerARMCompat::testInt32(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_INT32));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBoolean(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_BOOLEAN));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNull(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_NULL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testUndefined(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_UNDEFINED));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testString(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_STRING));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testSymbol(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_SYMBOL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigInt(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_BIGINT));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testObject(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_OBJECT));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testMagic(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_MAGIC));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testPrimitive(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JS::detail::ValueUpperExclPrimitiveTag));
+  return cond == Equal ? Below : AboveOrEqual;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testGCThing(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag));
+  return cond == Equal ? AboveOrEqual : Below;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testGCThing(
+    Assembler::Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag));
+  return cond == Equal ? AboveOrEqual : Below;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testMagic(
+    Assembler::Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_MAGIC));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testInt32(
+    Assembler::Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_INT32));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDouble(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testDouble(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBoolean(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testBoolean(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNull(Condition cond,
+                                                       const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testNull(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testUndefined(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testUndefined(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testString(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testString(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testSymbol(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testSymbol(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigInt(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testBigInt(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testObject(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testObject(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNumber(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testNumber(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDouble(Condition cond,
+                                                         Register tag) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  Condition actual = (cond == Equal) ? Below : AboveOrEqual;
+  ma_cmp(tag, ImmTag(JSVAL_TAG_CLEAR));
+  return actual;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNumber(Condition cond,
+                                                         Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JS::detail::ValueUpperInclNumberTag));
+  return cond == Equal ? BelowOrEqual : Above;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testUndefined(
+    Condition cond, const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_UNDEFINED));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNull(Condition cond,
+                                                       const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_NULL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBoolean(
+    Condition cond, const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_BOOLEAN));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testString(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_STRING));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testSymbol(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_SYMBOL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigInt(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_BIGINT));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testInt32(Condition cond,
+                                                        const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_INT32));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testObject(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_OBJECT));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDouble(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  Assembler::Condition actual = (cond == Equal) ? Below : AboveOrEqual;
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_CLEAR));
+  return actual;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testMagic(
+    Condition cond, const BaseIndex& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_MAGIC));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testGCThing(
+    Condition cond, const BaseIndex& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag));
+  return cond == Equal ? AboveOrEqual : Below;
+}
+
+// Unboxing code.
+void MacroAssemblerARMCompat::unboxNonDouble(const ValueOperand& operand,
+                                             Register dest, JSValueType type) {
+  auto movPayloadToDest = [&]() {
+    if (operand.payloadReg() != dest) {
+      ma_mov(operand.payloadReg(), dest, LeaveCC);
+    }
+  };
+  if (!JitOptions.spectreValueMasking) {
+    movPayloadToDest();
+    return;
+  }
+
+  // Spectre mitigation: We zero the payload if the tag does not match the
+  // expected type and if this is a pointer type.
+  if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+    movPayloadToDest();
+    return;
+  }
+
+  // We zero the destination register and move the payload into it if
+  // the tag corresponds to the given type.
+  ma_cmp(operand.typeReg(), ImmType(type));
+  movPayloadToDest();
+  ma_mov(Imm32(0), dest, NotEqual);
+}
+
+void MacroAssemblerARMCompat::unboxNonDouble(const Address& src, Register dest,
+                                             JSValueType type) {
+  ScratchRegisterScope scratch(asMasm());
+  if (!JitOptions.spectreValueMasking) {
+    ma_ldr(ToPayload(src), dest, scratch);
+    return;
+  }
+
+  // Spectre mitigation: We zero the payload if the tag does not match the
+  // expected type and if this is a pointer type.
+  if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+    ma_ldr(ToPayload(src), dest, scratch);
+    return;
+  }
+
+  // We zero the destination register and move the payload into it if
+  // the tag corresponds to the given type.
+  ma_ldr(ToType(src), scratch, scratch);
+  ma_cmp(scratch, ImmType(type));
+  ma_ldr(ToPayload(src), dest, scratch, Offset, Equal);
+  ma_mov(Imm32(0), dest, NotEqual);
+}
+
+void MacroAssemblerARMCompat::unboxNonDouble(const BaseIndex& src,
+                                             Register dest, JSValueType type) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_alu(src.base, lsl(src.index, src.scale), scratch2, OpAdd);
+  Address value(scratch2, src.offset);
+  unboxNonDouble(value, dest, type);
+}
+
+void MacroAssemblerARMCompat::unboxDouble(const ValueOperand& operand,
+                                          FloatRegister dest) {
+  MOZ_ASSERT(dest.isDouble());
+  as_vxfer(operand.payloadReg(), operand.typeReg(), VFPRegister(dest),
+           CoreToFloat);
+}
+
+void MacroAssemblerARMCompat::unboxDouble(const Address& src,
+                                          FloatRegister dest) {
+  MOZ_ASSERT(dest.isDouble());
+  loadDouble(src, dest);
+}
+
+void MacroAssemblerARMCompat::unboxDouble(const BaseIndex& src,
+                                          FloatRegister dest) {
+  MOZ_ASSERT(dest.isDouble());
+  loadDouble(src, dest);
+}
+
+void MacroAssemblerARMCompat::unboxValue(const ValueOperand& src,
+                                         AnyRegister dest, JSValueType type) {
+  if (dest.isFloat()) {
+    Label notInt32, end;
+    asMasm().branchTestInt32(Assembler::NotEqual, src, &notInt32);
+    convertInt32ToDouble(src.payloadReg(), dest.fpu());
+    ma_b(&end);
+    bind(&notInt32);
+    unboxDouble(src, dest.fpu());
+    bind(&end);
+  } else {
+    unboxNonDouble(src, dest.gpr(), type);
+  }
+}
+
+void MacroAssemblerARMCompat::boxDouble(FloatRegister src,
+                                        const ValueOperand& dest,
+                                        FloatRegister) {
+  as_vxfer(dest.payloadReg(), dest.typeReg(), VFPRegister(src), FloatToCore);
+}
+
+void MacroAssemblerARMCompat::boxNonDouble(JSValueType type, Register src,
+                                           const ValueOperand& dest) {
+  if (src != dest.payloadReg()) {
+    ma_mov(src, dest.payloadReg());
+  }
+  ma_mov(ImmType(type), dest.typeReg());
+}
+
+void MacroAssemblerARMCompat::boolValueToDouble(const ValueOperand& operand,
+                                                FloatRegister dest) {
+  VFPRegister d = VFPRegister(dest);
+  loadConstantDouble(1.0, dest);
+  as_cmp(operand.payloadReg(), Imm8(0));
+  // If the source is 0, then subtract the dest from itself, producing 0.
+  as_vsub(d, d, d, Equal);
+}
+
+void MacroAssemblerARMCompat::int32ValueToDouble(const ValueOperand& operand,
+                                                 FloatRegister dest) {
+  // Transfer the integral value to a floating point register.
+  VFPRegister vfpdest = VFPRegister(dest);
+  as_vxfer(operand.payloadReg(), InvalidReg, vfpdest.sintOverlay(),
+           CoreToFloat);
+  // Convert the value to a double.
+  as_vcvt(vfpdest, vfpdest.sintOverlay());
+}
+
+void MacroAssemblerARMCompat::boolValueToFloat32(const ValueOperand& operand,
+                                                 FloatRegister dest) {
+  VFPRegister d = VFPRegister(dest).singleOverlay();
+  loadConstantFloat32(1.0, dest);
+  as_cmp(operand.payloadReg(), Imm8(0));
+  // If the source is 0, then subtract the dest from itself, producing 0.
+  as_vsub(d, d, d, Equal);
+}
+
+void MacroAssemblerARMCompat::int32ValueToFloat32(const ValueOperand& operand,
+                                                  FloatRegister dest) {
+  // Transfer the integral value to a floating point register.
+  VFPRegister vfpdest = VFPRegister(dest).singleOverlay();
+  as_vxfer(operand.payloadReg(), InvalidReg, vfpdest.sintOverlay(),
+           CoreToFloat);
+  // Convert the value to a float.
+  as_vcvt(vfpdest, vfpdest.sintOverlay());
+}
+
+void MacroAssemblerARMCompat::loadConstantFloat32(float f, FloatRegister dest) {
+  ma_vimm_f32(f, dest);
+}
+
+void MacroAssemblerARMCompat::loadInt32OrDouble(const Address& src,
+                                                FloatRegister dest) {
+  Label notInt32, end;
+
+  // If it's an int, convert to a double.
+  {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_ldr(ToType(src), scratch, scratch2);
+    asMasm().branchTestInt32(Assembler::NotEqual, scratch, &notInt32);
+    ma_ldr(ToPayload(src), scratch, scratch2);
+    convertInt32ToDouble(scratch, dest);
+    ma_b(&end);
+  }
+
+  // Not an int, just load as double.
+  bind(&notInt32);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_vldr(src, dest, scratch);
+  }
+  bind(&end);
+}
+
+void MacroAssemblerARMCompat::loadInt32OrDouble(Register base, Register index,
+                                                FloatRegister dest,
+                                                int32_t shift) {
+  Label notInt32, end;
+
+  static_assert(NUNBOX32_PAYLOAD_OFFSET == 0);
+
+  ScratchRegisterScope scratch(asMasm());
+
+  // If it's an int, convert it to double.
+  ma_alu(base, lsl(index, shift), scratch, OpAdd);
+
+  // Since we only have one scratch register, we need to stomp over it with
+  // the tag.
+  ma_ldr(DTRAddr(scratch, DtrOffImm(NUNBOX32_TYPE_OFFSET)), scratch);
+  asMasm().branchTestInt32(Assembler::NotEqual, scratch, &notInt32);
+
+  // Implicitly requires NUNBOX32_PAYLOAD_OFFSET == 0: no offset provided
+  ma_ldr(DTRAddr(base, DtrRegImmShift(index, LSL, shift)), scratch);
+  convertInt32ToDouble(scratch, dest);
+  ma_b(&end);
+
+  // Not an int, just load as double.
+  bind(&notInt32);
+  // First, recompute the offset that had been stored in the scratch register
+  // since the scratch register was overwritten loading in the type.
+  ma_alu(base, lsl(index, shift), scratch, OpAdd);
+  ma_vldr(VFPAddr(scratch, VFPOffImm(0)), dest);
+  bind(&end);
+}
+
+void MacroAssemblerARMCompat::loadConstantDouble(double dp,
+                                                 FloatRegister dest) {
+  ma_vimm(dp, dest);
+}
+
+// Treat the value as a boolean, and set condition codes accordingly.
+Assembler::Condition MacroAssemblerARMCompat::testInt32Truthy(
+    bool truthy, const ValueOperand& operand) {
+  ma_tst(operand.payloadReg(), operand.payloadReg());
+  return truthy ? NonZero : Zero;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBooleanTruthy(
+    bool truthy, const ValueOperand& operand) {
+  ma_tst(operand.payloadReg(), operand.payloadReg());
+  return truthy ? NonZero : Zero;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDoubleTruthy(
+    bool truthy, FloatRegister reg) {
+  as_vcmpz(VFPRegister(reg));
+  as_vmrs(pc);
+  as_cmp(r0, O2Reg(r0), Overflow);
+  return truthy ? NonZero : Zero;
+}
+
+Register MacroAssemblerARMCompat::extractObject(const Address& address,
+                                                Register scratch) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(ToPayload(address), scratch, scratch2);
+  return scratch;
+}
+
+Register MacroAssemblerARMCompat::extractTag(const Address& address,
+                                             Register scratch) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(ToType(address), scratch, scratch2);
+  return scratch;
+}
+
+Register MacroAssemblerARMCompat::extractTag(const BaseIndex& address,
+                                             Register scratch) {
+  ma_alu(address.base, lsl(address.index, address.scale), scratch, OpAdd,
+         LeaveCC);
+  return extractTag(Address(scratch, address.offset), scratch);
+}
+
+/////////////////////////////////////////////////////////////////
+// X86/X64-common (ARM too now) interface.
+/////////////////////////////////////////////////////////////////
+void MacroAssemblerARMCompat::storeValue(ValueOperand val, const Address& dst) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_str(val.payloadReg(), ToPayload(dst), scratch2);
+  ma_str(val.typeReg(), ToType(dst), scratch2);
+}
+
+void MacroAssemblerARMCompat::storeValue(ValueOperand val,
+                                         const BaseIndex& dest) {
+  ScratchRegisterScope scratch(asMasm());
+
+  if (isValueDTRDCandidate(val) && Abs(dest.offset) <= 255) {
+    Register tmpIdx;
+    if (dest.offset == 0) {
+      if (dest.scale == TimesOne) {
+        tmpIdx = dest.index;
+      } else {
+        ma_lsl(Imm32(dest.scale), dest.index, scratch);
+        tmpIdx = scratch;
+      }
+      ma_strd(val.payloadReg(), val.typeReg(),
+              EDtrAddr(dest.base, EDtrOffReg(tmpIdx)));
+    } else {
+      ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+      ma_strd(val.payloadReg(), val.typeReg(),
+              EDtrAddr(scratch, EDtrOffImm(dest.offset)));
+    }
+  } else {
+    ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+    storeValue(val, Address(scratch, dest.offset));
+  }
+}
+
+void MacroAssemblerARMCompat::loadValue(const BaseIndex& addr,
+                                        ValueOperand val) {
+  ScratchRegisterScope scratch(asMasm());
+
+  if (isValueDTRDCandidate(val) && Abs(addr.offset) <= 255) {
+    Register tmpIdx;
+    if (addr.offset == 0) {
+      if (addr.scale == TimesOne) {
+        // If the offset register is the same as one of the destination
+        // registers, LDRD's behavior is undefined. Use the scratch
+        // register to avoid this.
+        if (val.aliases(addr.index)) {
+          ma_mov(addr.index, scratch);
+          tmpIdx = scratch;
+        } else {
+          tmpIdx = addr.index;
+        }
+      } else {
+        ma_lsl(Imm32(addr.scale), addr.index, scratch);
+        tmpIdx = scratch;
+      }
+      ma_ldrd(EDtrAddr(addr.base, EDtrOffReg(tmpIdx)), val.payloadReg(),
+              val.typeReg());
+    } else {
+      ma_alu(addr.base, lsl(addr.index, addr.scale), scratch, OpAdd);
+      ma_ldrd(EDtrAddr(scratch, EDtrOffImm(addr.offset)), val.payloadReg(),
+              val.typeReg());
+    }
+  } else {
+    ma_alu(addr.base, lsl(addr.index, addr.scale), scratch, OpAdd);
+    loadValue(Address(scratch, addr.offset), val);
+  }
+}
+
+void MacroAssemblerARMCompat::loadValue(Address src, ValueOperand val) {
+  // TODO: copy this code into a generic function that acts on all sequences
+  // of memory accesses
+  if (isValueDTRDCandidate(val)) {
+    // If the value we want is in two consecutive registers starting with an
+    // even register, they can be combined as a single ldrd.
+    int offset = src.offset;
+    if (offset < 256 && offset > -256) {
+      ma_ldrd(EDtrAddr(src.base, EDtrOffImm(src.offset)), val.payloadReg(),
+              val.typeReg());
+      return;
+    }
+  }
+  // If the value is lower than the type, then we may be able to use an ldm
+  // instruction.
+
+  if (val.payloadReg().code() < val.typeReg().code()) {
+    if (src.offset <= 4 && src.offset >= -8 && (src.offset & 3) == 0) {
+      // Turns out each of the 4 value -8, -4, 0, 4 corresponds exactly
+      // with one of LDM{DB, DA, IA, IB}
+      DTMMode mode;
+      switch (src.offset) {
+        case -8:
+          mode = DB;
+          break;
+        case -4:
+          mode = DA;
+          break;
+        case 0:
+          mode = IA;
+          break;
+        case 4:
+          mode = IB;
+          break;
+        default:
+          MOZ_CRASH("Bogus Offset for LoadValue as DTM");
+      }
+      startDataTransferM(IsLoad, src.base, mode);
+      transferReg(val.payloadReg());
+      transferReg(val.typeReg());
+      finishDataTransfer();
+      return;
+    }
+  }
+
+  loadUnalignedValue(src, val);
+}
+
+void MacroAssemblerARMCompat::loadUnalignedValue(const Address& src,
+                                                 ValueOperand dest) {
+  Address payload = ToPayload(src);
+  Address type = ToType(src);
+
+  // Ensure that loading the payload does not erase the pointer to the Value
+  // in memory.
+  if (type.base != dest.payloadReg()) {
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(payload, dest.payloadReg(), scratch2);
+    ma_ldr(type, dest.typeReg(), scratch2);
+  } else {
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(type, dest.typeReg(), scratch2);
+    ma_ldr(payload, dest.payloadReg(), scratch2);
+  }
+}
+
+void MacroAssemblerARMCompat::tagValue(JSValueType type, Register payload,
+                                       ValueOperand dest) {
+  MOZ_ASSERT(dest.typeReg() != dest.payloadReg());
+  if (payload != dest.payloadReg()) {
+    ma_mov(payload, dest.payloadReg());
+  }
+  ma_mov(ImmType(type), dest.typeReg());
+}
+
+void MacroAssemblerARMCompat::pushValue(ValueOperand val) {
+  ma_push(val.typeReg());
+  ma_push(val.payloadReg());
+}
+
+void MacroAssemblerARMCompat::pushValue(const Address& addr) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  ma_ldr(ToType(addr), scratch, scratch2);
+  ma_push(scratch);
+  ma_ldr(ToPayloadAfterStackPush(addr), scratch, scratch2);
+  ma_push(scratch);
+}
+
+void MacroAssemblerARMCompat::pushValue(const BaseIndex& addr,
+                                        Register scratch) {
+  computeEffectiveAddress(addr, scratch);
+  pushValue(Address(scratch, 0));
+}
+
+void MacroAssemblerARMCompat::popValue(ValueOperand val) {
+  ma_pop(val.payloadReg());
+  ma_pop(val.typeReg());
+}
+
+void MacroAssemblerARMCompat::storePayload(const Value& val,
+                                           const Address& dest) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (val.isGCThing()) {
+    ma_mov(ImmGCPtr(val.toGCThing()), scratch);
+  } else {
+    ma_mov(Imm32(val.toNunboxPayload()), scratch);
+  }
+  ma_str(scratch, ToPayload(dest), scratch2);
+}
+
+void MacroAssemblerARMCompat::storePayload(Register src, const Address& dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_str(src, ToPayload(dest), scratch);
+}
+
+void MacroAssemblerARMCompat::storePayload(const Value& val,
+                                           const BaseIndex& dest) {
+  unsigned shift = ScaleToShift(dest.scale);
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (val.isGCThing()) {
+    ma_mov(ImmGCPtr(val.toGCThing()), scratch);
+  } else {
+    ma_mov(Imm32(val.toNunboxPayload()), scratch);
+  }
+
+  // If NUNBOX32_PAYLOAD_OFFSET is not zero, the memory operand [base + index
+  // << shift + imm] cannot be encoded into a single instruction, and cannot
+  // be integrated into the as_dtr call.
+  static_assert(NUNBOX32_PAYLOAD_OFFSET == 0);
+
+  // If an offset is used, modify the base so that a [base + index << shift]
+  // instruction format can be used.
+  if (dest.offset != 0) {
+    ma_add(dest.base, Imm32(dest.offset), dest.base, scratch2);
+  }
+
+  as_dtr(IsStore, 32, Offset, scratch,
+         DTRAddr(dest.base, DtrRegImmShift(dest.index, LSL, shift)));
+
+  // Restore the original value of the base, if necessary.
+  if (dest.offset != 0) {
+    ma_sub(dest.base, Imm32(dest.offset), dest.base, scratch);
+  }
+}
+
+void MacroAssemblerARMCompat::storePayload(Register src,
+                                           const BaseIndex& dest) {
+  unsigned shift = ScaleToShift(dest.scale);
+  MOZ_ASSERT(shift < 32);
+
+  ScratchRegisterScope scratch(asMasm());
+
+  // If NUNBOX32_PAYLOAD_OFFSET is not zero, the memory operand [base + index
+  // << shift + imm] cannot be encoded into a single instruction, and cannot
+  // be integrated into the as_dtr call.
+  static_assert(NUNBOX32_PAYLOAD_OFFSET == 0);
+
+  // Save/restore the base if the BaseIndex has an offset, as above.
+  if (dest.offset != 0) {
+    ma_add(dest.base, Imm32(dest.offset), dest.base, scratch);
+  }
+
+  // Technically, shift > -32 can be handle by changing LSL to ASR, but should
+  // never come up, and this is one less code path to get wrong.
+  as_dtr(IsStore, 32, Offset, src,
+         DTRAddr(dest.base, DtrRegImmShift(dest.index, LSL, shift)));
+
+  if (dest.offset != 0) {
+    ma_sub(dest.base, Imm32(dest.offset), dest.base, scratch);
+  }
+}
+
+void MacroAssemblerARMCompat::storeTypeTag(ImmTag tag, const Address& dest) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  ma_mov(tag, scratch);
+  ma_str(scratch, ToType(dest), scratch2);
+}
+
+void MacroAssemblerARMCompat::storeTypeTag(ImmTag tag, const BaseIndex& dest) {
+  Register base = dest.base;
+  Register index = dest.index;
+  unsigned shift = ScaleToShift(dest.scale);
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  MOZ_ASSERT(base != scratch && base != scratch2);
+  MOZ_ASSERT(index != scratch && index != scratch2);
+
+  ma_add(base, Imm32(dest.offset + NUNBOX32_TYPE_OFFSET), scratch2, scratch);
+  ma_mov(tag, scratch);
+  ma_str(scratch, DTRAddr(scratch2, DtrRegImmShift(index, LSL, shift)));
+}
+
+void MacroAssemblerARM::ma_call(ImmPtr dest) {
+  ma_movPatchable(dest, CallReg, Always);
+  as_blx(CallReg);
+}
+
+void MacroAssemblerARMCompat::breakpoint() { as_bkpt(); }
+
+void MacroAssemblerARMCompat::simulatorStop(const char* msg) {
+#ifdef JS_SIMULATOR_ARM
+  MOZ_ASSERT(sizeof(char*) == 4);
+  writeInst(0xefffffff);
+  writeInst((int)msg);
+#endif
+}
+
+void MacroAssemblerARMCompat::ensureDouble(const ValueOperand& source,
+                                           FloatRegister dest, Label* failure) {
+  Label isDouble, done;
+  asMasm().branchTestDouble(Assembler::Equal, source.typeReg(), &isDouble);
+  asMasm().branchTestInt32(Assembler::NotEqual, source.typeReg(), failure);
+
+  convertInt32ToDouble(source.payloadReg(), dest);
+  jump(&done);
+
+  bind(&isDouble);
+  unboxDouble(source, dest);
+
+  bind(&done);
+}
+
+void MacroAssemblerARMCompat::breakpoint(Condition cc) {
+  ma_ldr(DTRAddr(r12, DtrRegImmShift(r12, LSL, 0, IsDown)), r12, Offset, cc);
+}
+
+void MacroAssemblerARMCompat::checkStackAlignment() {
+  asMasm().assertStackAlignment(ABIStackAlignment);
+}
+
+void MacroAssemblerARMCompat::handleFailureWithHandlerTail(
+    Label* profilerExitTail, Label* bailoutTail) {
+  // Reserve space for exception information.
+  int size = (sizeof(ResumeFromException) + 7) & ~7;
+
+  Imm8 size8(size);
+  as_sub(sp, sp, size8);
+  ma_mov(sp, r0);
+
+  // Call the handler.
+  using Fn = void (*)(ResumeFromException * rfe);
+  asMasm().setupUnalignedABICall(r1);
+  asMasm().passABIArg(r0);
+  asMasm().callWithABI<Fn, HandleException>(
+      MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+  Label entryFrame;
+  Label catch_;
+  Label finally;
+  Label returnBaseline;
+  Label returnIon;
+  Label bailout;
+  Label wasm;
+  Label wasmCatch;
+
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfKind()), r0, scratch);
+  }
+
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::EntryFrame), &entryFrame);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch),
+                    &catch_);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally),
+                    &finally);
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::ForcedReturnBaseline),
+                    &returnBaseline);
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout),
+                    &bailout);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Wasm),
+                    &wasm);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch),
+                    &wasmCatch);
+
+  breakpoint();  // Invalid kind.
+
+  // No exception handler. Load the error value, restore state and return from
+  // the entry frame.
+  bind(&entryFrame);
+  asMasm().moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+
+  // We're going to be returning by the ion calling convention, which returns
+  // by ??? (for now, I think ldr pc, [sp]!)
+  as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(4)));
+
+  // If we found a catch handler, this must be a baseline frame. Restore state
+  // and jump to the catch block.
+  bind(&catch_);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfTarget()), r0, scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+  jump(r0);
+
+  // If we found a finally block, this must be a baseline frame. Push two
+  // values expected by the finally block: the exception and BooleanValue(true).
+  bind(&finally);
+  ValueOperand exception = ValueOperand(r1, r2);
+  loadValue(Operand(sp, ResumeFromException::offsetOfException()), exception);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfTarget()), r0, scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+
+  pushValue(exception);
+  pushValue(BooleanValue(true));
+  jump(r0);
+
+  // Return BaselineFrame->returnValue() to the caller.
+  // Used in debug mode and for GeneratorReturn.
+  Label profilingInstrumentation;
+  bind(&returnBaseline);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+  loadValue(Address(r11, BaselineFrame::reverseOffsetOfReturnValue()),
+            JSReturnOperand);
+  jump(&profilingInstrumentation);
+
+  // Return the given value to the caller.
+  bind(&returnIon);
+  loadValue(Address(sp, ResumeFromException::offsetOfException()),
+            JSReturnOperand);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+
+  // If profiling is enabled, then update the lastProfilingFrame to refer to
+  // caller frame before returning. This code is shared by ForcedReturnIon
+  // and ForcedReturnBaseline.
+  bind(&profilingInstrumentation);
+  {
+    Label skipProfilingInstrumentation;
+    // Test if profiler enabled.
+    AbsoluteAddress addressOfEnabled(
+        asMasm().runtime()->geckoProfiler().addressOfEnabled());
+    asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
+                      &skipProfilingInstrumentation);
+    jump(profilerExitTail);
+    bind(&skipProfilingInstrumentation);
+  }
+
+  ma_mov(r11, sp);
+  pop(r11);
+  ret();
+
+  // If we are bailing out to baseline to handle an exception, jump to the
+  // bailout tail stub. Load 1 (true) in ReturnReg to indicate success.
+  bind(&bailout);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfBailoutInfo()), r2,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+    ma_mov(Imm32(1), ReturnReg);
+  }
+  jump(bailoutTail);
+
+  // If we are throwing and the innermost frame was a wasm frame, reset SP and
+  // FP; SP is pointing to the unwound return address to the wasm entry, so
+  // we can just ret().
+  bind(&wasm);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+    ma_mov(Imm32(int32_t(wasm::FailInstanceReg)), InstanceReg);
+  }
+  as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(4)));
+
+  // Found a wasm catch handler, restore state and jump to it.
+  bind(&wasmCatch);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfTarget()), r1, scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+  jump(r1);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testStringTruthy(
+    bool truthy, const ValueOperand& value) {
+  Register string = value.payloadReg();
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  ma_dtr(IsLoad, string, Imm32(JSString::offsetOfLength()), scratch, scratch2);
+  as_cmp(scratch, Imm8(0));
+  return truthy ? Assembler::NotEqual : Assembler::Equal;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigIntTruthy(
+    bool truthy, const ValueOperand& value) {
+  Register bi = value.payloadReg();
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  ma_dtr(IsLoad, bi, Imm32(BigInt::offsetOfDigitLength()), scratch, scratch2);
+  as_cmp(scratch, Imm8(0));
+  return truthy ? Assembler::NotEqual : Assembler::Equal;
+}
+
+void MacroAssemblerARMCompat::floor(FloatRegister input, Register output,
+                                    Label* bail) {
+  Label handleZero;
+  Label handleNeg;
+  Label fin;
+
+  ScratchDoubleScope scratchDouble(asMasm());
+
+  compareDouble(input, NoVFPRegister);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handleNeg, Assembler::Signed);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+
+  // The argument is a positive number, truncation is the path to glory. Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+  ma_vcvt_F64_U32(input, scratchDouble.uintOverlay());
+  ma_vxfer(scratchDouble.uintOverlay(), output);
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(&fin);
+
+  bind(&handleZero);
+  // Move the top word of the double into the output reg, if it is non-zero,
+  // then the original value was -0.0.
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  bind(&handleNeg);
+  // Negative case, negate, then start dancing.
+  ma_vneg(input, input);
+  ma_vcvt_F64_U32(input, scratchDouble.uintOverlay());
+  ma_vxfer(scratchDouble.uintOverlay(), output);
+  ma_vcvt_U32_F64(scratchDouble.uintOverlay(), scratchDouble);
+  compareDouble(scratchDouble, input);
+  as_add(output, output, Imm8(1), LeaveCC, NotEqual);
+  // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
+  // result will still be a negative number.
+  as_rsb(output, output, Imm8(0), SetCC);
+  // Flip the negated input back to its original value.
+  ma_vneg(input, input);
+  // If the result looks non-negative, then this value didn't actually fit
+  // into the int range, and special handling is required. Zero is also caught
+  // by this case, but floor of a negative number should never be zero.
+  ma_b(bail, NotSigned);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::floorf(FloatRegister input, Register output,
+                                     Label* bail) {
+  Label handleZero;
+  Label handleNeg;
+  Label fin;
+  compareFloat(input, NoVFPRegister);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handleNeg, Assembler::Signed);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+
+  // The argument is a positive number, truncation is the path to glory; Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+  {
+    ScratchFloat32Scope scratch(asMasm());
+    ma_vcvt_F32_U32(input, scratch.uintOverlay());
+    ma_vxfer(VFPRegister(scratch).uintOverlay(), output);
+  }
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(&fin);
+
+  bind(&handleZero);
+  // Move the top word of the double into the output reg, if it is non-zero,
+  // then the original value was -0.0.
+  as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore,
+           Always, 0);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  bind(&handleNeg);
+  // Negative case, negate, then start dancing.
+  {
+    ScratchFloat32Scope scratch(asMasm());
+    ma_vneg_f32(input, input);
+    ma_vcvt_F32_U32(input, scratch.uintOverlay());
+    ma_vxfer(VFPRegister(scratch).uintOverlay(), output);
+    ma_vcvt_U32_F32(scratch.uintOverlay(), scratch);
+    compareFloat(scratch, input);
+    as_add(output, output, Imm8(1), LeaveCC, NotEqual);
+  }
+  // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
+  // result will still be a negative number.
+  as_rsb(output, output, Imm8(0), SetCC);
+  // Flip the negated input back to its original value.
+  ma_vneg_f32(input, input);
+  // If the result looks non-negative, then this value didn't actually fit
+  // into the int range, and special handling is required. Zero is also caught
+  // by this case, but floor of a negative number should never be zero.
+  ma_b(bail, NotSigned);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::ceil(FloatRegister input, Register output,
+                                   Label* bail) {
+  Label handleZero;
+  Label handlePos;
+  Label fin;
+
+  compareDouble(input, NoVFPRegister);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handlePos, Assembler::NotSigned);
+
+  ScratchDoubleScope scratchDouble(asMasm());
+
+  // We are in the ]-Inf; 0[ range
+  // If we are in the ]-1; 0[ range => bailout
+  loadConstantDouble(-1.0, scratchDouble);
+  compareDouble(input, scratchDouble);
+  ma_b(bail, Assembler::GreaterThan);
+
+  // We are in the ]-Inf; -1] range: ceil(x) == -floor(-x) and floor can be
+  // computed with direct truncation here (x > 0).
+  ma_vneg(input, scratchDouble);
+  FloatRegister ScratchUIntReg = scratchDouble.uintOverlay();
+  ma_vcvt_F64_U32(scratchDouble, ScratchUIntReg);
+  ma_vxfer(ScratchUIntReg, output);
+  ma_neg(output, output, SetCC);
+  ma_b(bail, NotSigned);
+  ma_b(&fin);
+
+  // Test for 0.0 / -0.0: if the top word of the input double is not zero,
+  // then it was -0 and we need to bail out.
+  bind(&handleZero);
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  // We are in the ]0; +inf] range: truncate integer values, maybe add 1 for
+  // non integer values, maybe bail if overflow.
+  bind(&handlePos);
+  ma_vcvt_F64_U32(input, ScratchUIntReg);
+  ma_vxfer(ScratchUIntReg, output);
+  ma_vcvt_U32_F64(ScratchUIntReg, scratchDouble);
+  compareDouble(scratchDouble, input);
+  as_add(output, output, Imm8(1), LeaveCC, NotEqual);
+  // Bail out if the add overflowed or the result is non positive.
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(bail, Zero);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::ceilf(FloatRegister input, Register output,
+                                    Label* bail) {
+  Label handleZero;
+  Label handlePos;
+  Label fin;
+
+  compareFloat(input, NoVFPRegister);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handlePos, Assembler::NotSigned);
+
+  // We are in the ]-Inf; 0[ range
+  // If we are in the ]-1; 0[ range => bailout
+  {
+    ScratchFloat32Scope scratch(asMasm());
+    loadConstantFloat32(-1.f, scratch);
+    compareFloat(input, scratch);
+    ma_b(bail, Assembler::GreaterThan);
+  }
+
+  // We are in the ]-Inf; -1] range: ceil(x) == -floor(-x) and floor can be
+  // computed with direct truncation here (x > 0).
+  {
+    ScratchDoubleScope scratchDouble(asMasm());
+    FloatRegister scratchFloat = scratchDouble.asSingle();
+    FloatRegister scratchUInt = scratchDouble.uintOverlay();
+
+    ma_vneg_f32(input, scratchFloat);
+    ma_vcvt_F32_U32(scratchFloat, scratchUInt);
+    ma_vxfer(scratchUInt, output);
+    ma_neg(output, output, SetCC);
+    ma_b(bail, NotSigned);
+    ma_b(&fin);
+  }
+
+  // Test for 0.0 / -0.0: if the top word of the input double is not zero,
+  // then it was -0 and we need to bail out.
+  bind(&handleZero);
+  as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore,
+           Always, 0);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  // We are in the ]0; +inf] range: truncate integer values, maybe add 1 for
+  // non integer values, maybe bail if overflow.
+  bind(&handlePos);
+  {
+    ScratchDoubleScope scratchDouble(asMasm());
+    FloatRegister scratchFloat = scratchDouble.asSingle();
+    FloatRegister scratchUInt = scratchDouble.uintOverlay();
+
+    ma_vcvt_F32_U32(input, scratchUInt);
+    ma_vxfer(scratchUInt, output);
+    ma_vcvt_U32_F32(scratchUInt, scratchFloat);
+    compareFloat(scratchFloat, input);
+    as_add(output, output, Imm8(1), LeaveCC, NotEqual);
+
+    // Bail on overflow or non-positive result.
+    ma_mov(output, output, SetCC);
+    ma_b(bail, Signed);
+    ma_b(bail, Zero);
+  }
+
+  bind(&fin);
+}
+
+CodeOffset MacroAssemblerARMCompat::toggledJump(Label* label) {
+  // Emit a B that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
+  BufferOffset b = ma_b(label, Always);
+  CodeOffset ret(b.getOffset());
+  return ret;
+}
+
+CodeOffset MacroAssemblerARMCompat::toggledCall(JitCode* target, bool enabled) {
+  BufferOffset bo = nextOffset();
+  addPendingJump(bo, ImmPtr(target->raw()), RelocationKind::JITCODE);
+  ScratchRegisterScope scratch(asMasm());
+  ma_movPatchable(ImmPtr(target->raw()), scratch, Always);
+  if (enabled) {
+    ma_blx(scratch);
+  } else {
+    ma_nop();
+  }
+  return CodeOffset(bo.getOffset());
+}
+
+void MacroAssemblerARMCompat::round(FloatRegister input, Register output,
+                                    Label* bail, FloatRegister tmp) {
+  Label handleZero;
+  Label handleNeg;
+  Label fin;
+
+  ScratchDoubleScope scratchDouble(asMasm());
+
+  // Do a compare based on the original value, then do most other things based
+  // on the shifted value.
+  ma_vcmpz(input);
+  // Since we already know the sign bit, flip all numbers to be positive,
+  // stored in tmp.
+  ma_vabs(input, tmp);
+  as_vmrs(pc);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handleNeg, Assembler::Signed);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+
+  // The argument is a positive number, truncation is the path to glory; Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+
+  // Add the biggest number less than 0.5 (not 0.5, because adding that to
+  // the biggest number less than 0.5 would undesirably round up to 1), and
+  // store the result into tmp.
+  loadConstantDouble(GetBiggestNumberLessThan(0.5), scratchDouble);
+  ma_vadd(scratchDouble, tmp, tmp);
+
+  ma_vcvt_F64_U32(tmp, scratchDouble.uintOverlay());
+  ma_vxfer(VFPRegister(scratchDouble).uintOverlay(), output);
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(&fin);
+
+  bind(&handleZero);
+  // Move the top word of the double into the output reg, if it is non-zero,
+  // then the original value was -0.0
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  bind(&handleNeg);
+  // Negative case, negate, then start dancing. This number may be positive,
+  // since we added 0.5.
+
+  // Add 0.5 to negative numbers, store the result into tmp
+  loadConstantDouble(0.5, scratchDouble);
+  ma_vadd(scratchDouble, tmp, tmp);
+
+  ma_vcvt_F64_U32(tmp, scratchDouble.uintOverlay());
+  ma_vxfer(VFPRegister(scratchDouble).uintOverlay(), output);
+
+  // -output is now a correctly rounded value, unless the original value was
+  // exactly halfway between two integers, at which point, it has been rounded
+  // away from zero, when it should be rounded towards \infty.
+  ma_vcvt_U32_F64(scratchDouble.uintOverlay(), scratchDouble);
+  compareDouble(scratchDouble, tmp);
+  as_sub(output, output, Imm8(1), LeaveCC, Equal);
+  // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
+  // result will still be a negative number.
+  as_rsb(output, output, Imm8(0), SetCC);
+
+  // If the result looks non-negative, then this value didn't actually fit
+  // into the int range, and special handling is required, or it was zero,
+  // which means the result is actually -0.0 which also requires special
+  // handling.
+  ma_b(bail, NotSigned);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::roundf(FloatRegister input, Register output,
+                                     Label* bail, FloatRegister tmp) {
+  Label handleZero;
+  Label handleNeg;
+  Label fin;
+
+  ScratchFloat32Scope scratchFloat(asMasm());
+
+  // Do a compare based on the original value, then do most other things based
+  // on the shifted value.
+  compareFloat(input, NoVFPRegister);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handleNeg, Assembler::Signed);
+
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+
+  // The argument is a positive number, truncation is the path to glory; Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+
+  // Add the biggest number less than 0.5f (not 0.5f, because adding that to
+  // the biggest number less than 0.5f would undesirably round up to 1), and
+  // store the result into tmp.
+  loadConstantFloat32(GetBiggestNumberLessThan(0.5f), scratchFloat);
+  ma_vadd_f32(scratchFloat, input, tmp);
+
+  // Note: it doesn't matter whether x + .5 === x or not here, as it doesn't
+  // affect the semantics of the float to unsigned conversion (in particular,
+  // we are not applying any fixup after the operation).
+  ma_vcvt_F32_U32(tmp, scratchFloat.uintOverlay());
+  ma_vxfer(VFPRegister(scratchFloat).uintOverlay(), output);
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(&fin);
+
+  bind(&handleZero);
+
+  // Move the whole float32 into the output reg, if it is non-zero, then the
+  // original value was -0.0.
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 0);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  bind(&handleNeg);
+
+  // Add 0.5 to negative numbers, storing the result into tmp.
+  ma_vneg_f32(input, tmp);
+  loadConstantFloat32(0.5f, scratchFloat);
+  ma_vadd_f32(tmp, scratchFloat, scratchFloat);
+
+  // Adding 0.5 to a float input has chances to yield the wrong result, if
+  // the input is too large. In this case, skip the -1 adjustment made below.
+  compareFloat(scratchFloat, tmp);
+
+  // Negative case, negate, then start dancing. This number may be positive,
+  // since we added 0.5.
+  // /!\ The conditional jump afterwards depends on these two instructions
+  //     *not* setting the status flags. They need to not change after the
+  //     comparison above.
+  ma_vcvt_F32_U32(scratchFloat, tmp.uintOverlay());
+  ma_vxfer(VFPRegister(tmp).uintOverlay(), output);
+
+  Label flipSign;
+  ma_b(&flipSign, Equal);
+
+  // -output is now a correctly rounded value, unless the original value was
+  // exactly halfway between two integers, at which point, it has been rounded
+  // away from zero, when it should be rounded towards \infty.
+  ma_vcvt_U32_F32(tmp.uintOverlay(), tmp);
+  compareFloat(tmp, scratchFloat);
+  as_sub(output, output, Imm8(1), LeaveCC, Equal);
+
+  // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
+  // result will still be a negative number.
+  bind(&flipSign);
+  as_rsb(output, output, Imm8(0), SetCC);
+
+  // If the result looks non-negative, then this value didn't actually fit
+  // into the int range, and special handling is required, or it was zero,
+  // which means the result is actually -0.0 which also requires special
+  // handling.
+  ma_b(bail, NotSigned);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::trunc(FloatRegister input, Register output,
+                                    Label* bail) {
+  Label handleZero;
+  Label handlePos;
+  Label fin;
+
+  compareDouble(input, NoVFPRegister);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handlePos, Assembler::NotSigned);
+
+  ScratchDoubleScope scratchDouble(asMasm());
+
+  // We are in the ]-Inf; 0[ range
+  // If we are in the ]-1; 0[ range => bailout
+  loadConstantDouble(-1.0, scratchDouble);
+  compareDouble(input, scratchDouble);
+  ma_b(bail, Assembler::GreaterThan);
+
+  // We are in the ]-Inf; -1] range: trunc(x) == -floor(-x) and floor can be
+  // computed with direct truncation here (x > 0).
+  ma_vneg(input, scratchDouble);
+  ma_vcvt_F64_U32(scratchDouble, scratchDouble.uintOverlay());
+  ma_vxfer(scratchDouble.uintOverlay(), output);
+  ma_neg(output, output, SetCC);
+  ma_b(bail, NotSigned);
+  ma_b(&fin);
+
+  // Test for 0.0 / -0.0: if the top word of the input double is not zero,
+  // then it was -0 and we need to bail out.
+  bind(&handleZero);
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  // We are in the ]0; +inf] range: truncation is the path to glory. Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+  bind(&handlePos);
+  ma_vcvt_F64_U32(input, scratchDouble.uintOverlay());
+  ma_vxfer(scratchDouble.uintOverlay(), output);
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::truncf(FloatRegister input, Register output,
+                                     Label* bail) {
+  Label handleZero;
+  Label handlePos;
+  Label fin;
+
+  compareFloat(input, NoVFPRegister);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handlePos, Assembler::NotSigned);
+
+  // We are in the ]-Inf; 0[ range
+  // If we are in the ]-1; 0[ range => bailout
+  {
+    ScratchFloat32Scope scratch(asMasm());
+    loadConstantFloat32(-1.f, scratch);
+    compareFloat(input, scratch);
+    ma_b(bail, Assembler::GreaterThan);
+  }
+
+  // We are in the ]-Inf; -1] range: trunc(x) == -floor(-x) and floor can be
+  // computed with direct truncation here (x > 0).
+  {
+    ScratchDoubleScope scratchDouble(asMasm());
+    FloatRegister scratchFloat = scratchDouble.asSingle();
+    FloatRegister scratchUInt = scratchDouble.uintOverlay();
+
+    ma_vneg_f32(input, scratchFloat);
+    ma_vcvt_F32_U32(scratchFloat, scratchUInt);
+    ma_vxfer(scratchUInt, output);
+    ma_neg(output, output, SetCC);
+    ma_b(bail, NotSigned);
+    ma_b(&fin);
+  }
+
+  // Test for 0.0 / -0.0: if the top word of the input double is not zero,
+  // then it was -0 and we need to bail out.
+  bind(&handleZero);
+  as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore,
+           Always, 0);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  // We are in the ]0; +inf] range: truncation is the path to glory; Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  bind(&handlePos);
+  {
+    // The argument is a positive number,
+    // that clamps to INT_MAX.
+    {
+      ScratchFloat32Scope scratch(asMasm());
+      ma_vcvt_F32_U32(input, scratch.uintOverlay());
+      ma_vxfer(VFPRegister(scratch).uintOverlay(), output);
+    }
+    ma_mov(output, output, SetCC);
+    ma_b(bail, Signed);
+  }
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::profilerEnterFrame(Register framePtr,
+                                                 Register scratch) {
+  asMasm().loadJSContext(scratch);
+  loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
+  storePtr(framePtr,
+           Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
+  storePtr(ImmPtr(nullptr),
+           Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
+}
+
+void MacroAssemblerARMCompat::profilerExitFrame() {
+  jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail());
+}
+
+MacroAssembler& MacroAssemblerARM::asMasm() {
+  return *static_cast<MacroAssembler*>(this);
+}
+
+const MacroAssembler& MacroAssemblerARM::asMasm() const {
+  return *static_cast<const MacroAssembler*>(this);
+}
+
+MacroAssembler& MacroAssemblerARMCompat::asMasm() {
+  return *static_cast<MacroAssembler*>(this);
+}
+
+const MacroAssembler& MacroAssemblerARMCompat::asMasm() const {
+  return *static_cast<const MacroAssembler*>(this);
+}
+
+void MacroAssembler::subFromStackPtr(Imm32 imm32) {
+  ScratchRegisterScope scratch(*this);
+  if (imm32.value) {
+    ma_sub(imm32, sp, scratch);
+  }
+}
+
+//{{{ check_macroassembler_style
+// ===============================================================
+// MacroAssembler high-level usage.
+
+void MacroAssembler::flush() { Assembler::flush(); }
+
+void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); }
+
+// ===============================================================
+// Stack manipulation functions.
+
+size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) {
+  return set.gprs().size() * sizeof(intptr_t) + set.fpus().getPushSizeInBytes();
+}
+
+void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
+  mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
+
+  int32_t diffF = set.fpus().getPushSizeInBytes();
+  int32_t diffG = set.gprs().size() * sizeof(intptr_t);
+
+  if (set.gprs().size() > 1) {
+    adjustFrame(diffG);
+    startDataTransferM(IsStore, StackPointer, DB, WriteBack);
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      transferReg(*iter);
+    }
+    finishDataTransfer();
+  } else {
+    reserveStack(diffG);
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      storePtr(*iter, Address(StackPointer, diffG));
+    }
+  }
+  MOZ_ASSERT(diffG == 0);
+
+  // It's possible that the logic is just fine as it is if the reduced set
+  // maps SIMD pairs to plain doubles and transferMultipleByRuns() stores
+  // and loads doubles.
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  adjustFrame(diffF);
+  diffF += transferMultipleByRuns(set.fpus(), IsStore, StackPointer, DB);
+  MOZ_ASSERT(diffF == 0);
+
+  MOZ_ASSERT(framePushed() - framePushedInitial ==
+             PushRegsInMaskSizeInBytes(set));
+}
+
+void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
+                                     Register scratch) {
+  mozilla::DebugOnly<size_t> offsetInitial = dest.offset;
+
+  int32_t diffF = set.fpus().getPushSizeInBytes();
+  int32_t diffG = set.gprs().size() * sizeof(intptr_t);
+
+  MOZ_ASSERT(dest.offset >= diffF + diffG);
+
+  if (set.gprs().size() > 1) {
+    computeEffectiveAddress(dest, scratch);
+
+    startDataTransferM(IsStore, scratch, DB, WriteBack);
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      dest.offset -= sizeof(intptr_t);
+      transferReg(*iter);
+    }
+    finishDataTransfer();
+  } else {
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      dest.offset -= sizeof(intptr_t);
+      storePtr(*iter, dest);
+    }
+  }
+  MOZ_ASSERT(diffG == 0);
+  (void)diffG;
+
+  // See above.
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  MOZ_ASSERT(diffF >= 0);
+  if (diffF > 0) {
+    computeEffectiveAddress(dest, scratch);
+    diffF += transferMultipleByRuns(set.fpus(), IsStore, scratch, DB);
+  }
+
+  MOZ_ASSERT(diffF == 0);
+
+  // "The amount of space actually used does not exceed what
+  // `PushRegsInMaskSizeInBytes` claims will be used."
+  MOZ_ASSERT(offsetInitial - dest.offset <= PushRegsInMaskSizeInBytes(set));
+}
+
+void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
+                                         LiveRegisterSet ignore) {
+  mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
+
+  int32_t diffG = set.gprs().size() * sizeof(intptr_t);
+  int32_t diffF = set.fpus().getPushSizeInBytes();
+  const int32_t reservedG = diffG;
+  const int32_t reservedF = diffF;
+
+  // See above.
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  // ARM can load multiple registers at once, but only if we want back all
+  // the registers we previously saved to the stack.
+  if (ignore.emptyFloat()) {
+    diffF -= transferMultipleByRuns(set.fpus(), IsLoad, StackPointer, IA);
+    adjustFrame(-reservedF);
+  } else {
+    LiveFloatRegisterSet fpset(set.fpus().reduceSetForPush());
+    LiveFloatRegisterSet fpignore(ignore.fpus().reduceSetForPush());
+    for (FloatRegisterBackwardIterator iter(fpset); iter.more(); ++iter) {
+      diffF -= (*iter).size();
+      if (!fpignore.has(*iter)) {
+        loadDouble(Address(StackPointer, diffF), *iter);
+      }
+    }
+    freeStack(reservedF);
+  }
+  MOZ_ASSERT(diffF == 0);
+
+  if (set.gprs().size() > 1 && ignore.emptyGeneral()) {
+    startDataTransferM(IsLoad, StackPointer, IA, WriteBack);
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      transferReg(*iter);
+    }
+    finishDataTransfer();
+    adjustFrame(-reservedG);
+  } else {
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      if (!ignore.has(*iter)) {
+        loadPtr(Address(StackPointer, diffG), *iter);
+      }
+    }
+    freeStack(reservedG);
+  }
+  MOZ_ASSERT(diffG == 0);
+
+  MOZ_ASSERT(framePushedInitial - framePushed() ==
+             PushRegsInMaskSizeInBytes(set));
+}
+
+void MacroAssembler::Push(Register reg) {
+  push(reg);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const Imm32 imm) {
+  push(imm);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmWord imm) {
+  push(imm);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmPtr imm) {
+  Push(ImmWord(uintptr_t(imm.value)));
+}
+
+void MacroAssembler::Push(const ImmGCPtr ptr) {
+  push(ptr);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(FloatRegister reg) {
+  VFPRegister r = VFPRegister(reg);
+  ma_vpush(VFPRegister(reg));
+  adjustFrame(r.size());
+}
+
+void MacroAssembler::PushBoxed(FloatRegister reg) {
+  MOZ_ASSERT(reg.isDouble());
+  Push(reg);
+}
+
+void MacroAssembler::Pop(Register reg) {
+  ma_pop(reg);
+  adjustFrame(-sizeof(intptr_t));
+}
+
+void MacroAssembler::Pop(FloatRegister reg) {
+  ma_vpop(reg);
+  adjustFrame(-reg.size());
+}
+
+void MacroAssembler::Pop(const ValueOperand& val) {
+  popValue(val);
+  adjustFrame(-sizeof(Value));
+}
+
+void MacroAssembler::PopStackPtr() {
+  as_dtr(IsLoad, 32, Offset, sp, DTRAddr(sp, DtrOffImm(0)));
+  adjustFrame(-sizeof(intptr_t));
+}
+
+// ===============================================================
+// Simple call functions.
+
+CodeOffset MacroAssembler::call(Register reg) {
+  as_blx(reg);
+  return CodeOffset(currentOffset());
+}
+
+CodeOffset MacroAssembler::call(Label* label) {
+  // For now, assume that it'll be nearby.
+  as_bl(label, Always);
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); }
+
+void MacroAssembler::call(ImmPtr imm) {
+  BufferOffset bo = m_buffer.nextOffset();
+  addPendingJump(bo, imm, RelocationKind::HARDCODED);
+  ma_call(imm);
+}
+
+CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) {
+  movePtr(imm, CallReg);
+  return call(CallReg);
+}
+
+void MacroAssembler::call(const Address& addr) {
+  loadPtr(addr, CallReg);
+  call(CallReg);
+}
+
+void MacroAssembler::call(JitCode* c) {
+  BufferOffset bo = m_buffer.nextOffset();
+  addPendingJump(bo, ImmPtr(c->raw()), RelocationKind::JITCODE);
+  ScratchRegisterScope scratch(*this);
+  ma_movPatchable(ImmPtr(c->raw()), scratch, Always);
+  callJitNoProfiler(scratch);
+}
+
+CodeOffset MacroAssembler::callWithPatch() {
+  // The caller ensures that the call is always in range using thunks (below)
+  // as necessary.
+  as_bl(BOffImm(), Always, /* documentation */ nullptr);
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
+  BufferOffset inst(callerOffset - 4);
+  BOffImm off = BufferOffset(calleeOffset).diffB<BOffImm>(inst);
+  MOZ_RELEASE_ASSERT(!off.isInvalid(),
+                     "Failed to insert necessary far jump islands");
+  as_bl(off, Always, inst);
+}
+
+CodeOffset MacroAssembler::farJumpWithPatch() {
+  static_assert(32 * 1024 * 1024 - JumpImmediateRange >
+                    wasm::MaxFuncs * 3 * sizeof(Instruction),
+                "always enough space for thunks");
+
+  // The goal of the thunk is to be able to jump to any address without the
+  // usual 32MiB branch range limitation. Additionally, to make the thunk
+  // simple to use, the thunk does not use the constant pool or require
+  // patching an absolute address. Instead, a relative offset is used which
+  // can be patched during compilation.
+
+  // Inhibit pools since these three words must be contiguous so that the offset
+  // calculations below are valid.
+  AutoForbidPoolsAndNops afp(this, 3);
+
+  // When pc is used, the read value is the address of the instruction + 8.
+  // This is exactly the address of the uint32 word we want to load.
+  ScratchRegisterScope scratch(*this);
+  ma_ldr(DTRAddr(pc, DtrOffImm(0)), scratch);
+
+  // Branch by making pc the destination register.
+  ma_add(pc, scratch, pc, LeaveCC, Always);
+
+  // Allocate space which will be patched by patchFarJump().
+  CodeOffset farJump(currentOffset());
+  writeInst(UINT32_MAX);
+
+  return farJump;
+}
+
+void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) {
+  uint32_t* u32 =
+      reinterpret_cast<uint32_t*>(editSrc(BufferOffset(farJump.offset())));
+  MOZ_ASSERT(*u32 == UINT32_MAX);
+
+  uint32_t addOffset = farJump.offset() - 4;
+  MOZ_ASSERT(editSrc(BufferOffset(addOffset))->is<InstALU>());
+
+  // When pc is read as the operand of the add, its value is the address of
+  // the add instruction + 8.
+  *u32 = (targetOffset - addOffset) - 8;
+}
+
+CodeOffset MacroAssembler::nopPatchableToCall() {
+  AutoForbidPoolsAndNops afp(this,
+                             /* max number of instructions in scope = */ 1);
+  ma_nop();
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) {
+  uint8_t* inst = call - 4;
+  MOZ_ASSERT(reinterpret_cast<Instruction*>(inst)->is<InstBLImm>() ||
+             reinterpret_cast<Instruction*>(inst)->is<InstNOP>());
+
+  new (inst) InstBLImm(BOffImm(target - inst), Assembler::Always);
+}
+
+void MacroAssembler::patchCallToNop(uint8_t* call) {
+  uint8_t* inst = call - 4;
+  MOZ_ASSERT(reinterpret_cast<Instruction*>(inst)->is<InstBLImm>() ||
+             reinterpret_cast<Instruction*>(inst)->is<InstNOP>());
+  new (inst) InstNOP();
+}
+
+void MacroAssembler::pushReturnAddress() { push(lr); }
+
+void MacroAssembler::popReturnAddress() { pop(lr); }
+
+// ===============================================================
+// ABI function calls.
+
+void MacroAssembler::setupUnalignedABICall(Register scratch) {
+  setupNativeABICall();
+  dynamicAlignment_ = true;
+
+  ma_mov(sp, scratch);
+  // Force sp to be aligned.
+  as_bic(sp, sp, Imm8(ABIStackAlignment - 1));
+  ma_push(scratch);
+}
+
+void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
+  MOZ_ASSERT(inCall_);
+  uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
+
+  if (dynamicAlignment_) {
+    // sizeof(intptr_t) accounts for the saved stack pointer pushed by
+    // setupUnalignedABICall.
+    stackForCall += ComputeByteAlignment(stackForCall + sizeof(intptr_t),
+                                         ABIStackAlignment);
+  } else {
+    uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
+    stackForCall += ComputeByteAlignment(
+        stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
+  }
+
+  *stackAdjust = stackForCall;
+  reserveStack(stackForCall);
+
+  // Position all arguments.
+  {
+    enoughMemory_ &= moveResolver_.resolve();
+    if (!enoughMemory_) {
+      return;
+    }
+
+    MoveEmitter emitter(*this);
+    emitter.emit(moveResolver_);
+    emitter.finish();
+  }
+
+  assertStackAlignment(ABIStackAlignment);
+
+  // Save the lr register if we need to preserve it.
+  if (secondScratchReg_ != lr) {
+    ma_mov(lr, secondScratchReg_);
+  }
+}
+
+void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result,
+                                     bool callFromWasm) {
+  if (secondScratchReg_ != lr) {
+    ma_mov(secondScratchReg_, lr);
+  }
+
+  // Calls to native functions in wasm pass through a thunk which already
+  // fixes up the return value for us.
+  if (!callFromWasm && !UseHardFpABI()) {
+    switch (result) {
+      case MoveOp::DOUBLE:
+        // Move double from r0/r1 to ReturnFloatReg.
+        ma_vxfer(r0, r1, ReturnDoubleReg);
+        break;
+      case MoveOp::FLOAT32:
+        // Move float32 from r0 to ReturnFloatReg.
+        ma_vxfer(r0, ReturnFloat32Reg);
+        break;
+      case MoveOp::GENERAL:
+        break;
+      default:
+        MOZ_CRASH("unexpected callWithABI result");
+    }
+  }
+
+  freeStack(stackAdjust);
+
+  if (dynamicAlignment_) {
+    // While the x86 supports pop esp, on ARM that isn't well defined, so
+    // just do it manually.
+    as_dtr(IsLoad, 32, Offset, sp, DTRAddr(sp, DtrOffImm(0)));
+  }
+
+#ifdef DEBUG
+  MOZ_ASSERT(inCall_);
+  inCall_ = false;
+#endif
+}
+
+void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) {
+  // Load the callee in r12, as above.
+  ma_mov(fun, r12);
+  uint32_t stackAdjust;
+  callWithABIPre(&stackAdjust);
+  call(r12);
+  callWithABIPost(stackAdjust, result);
+}
+
+void MacroAssembler::callWithABINoProfiler(const Address& fun,
+                                           MoveOp::Type result) {
+  // Load the callee in r12, no instruction between the ldr and call should
+  // clobber it. Note that we can't use fun.base because it may be one of the
+  // IntArg registers clobbered before the call.
+  {
+    ScratchRegisterScope scratch(*this);
+    ma_ldr(fun, r12, scratch);
+  }
+  uint32_t stackAdjust;
+  callWithABIPre(&stackAdjust);
+  call(r12);
+  callWithABIPost(stackAdjust, result);
+}
+
+// ===============================================================
+// Jit Frames.
+
+uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) {
+  // On ARM any references to the pc, adds an additional 8 to it, which
+  // correspond to 2 instructions of 4 bytes.  Thus we use an additional nop
+  // to pad until we reach the pushed pc.
+  //
+  // Note: In practice this should not be necessary, as this fake return
+  // address is never used for resuming any execution. Thus theoriticaly we
+  // could just do a Push(pc), and ignore the nop as well as the pool.
+  enterNoPool(2);
+  DebugOnly<uint32_t> offsetBeforePush = currentOffset();
+  Push(pc);  // actually pushes $pc + 8.
+  ma_nop();
+  uint32_t pseudoReturnOffset = currentOffset();
+  leaveNoPool();
+
+  MOZ_ASSERT_IF(!oom(), pseudoReturnOffset - offsetBeforePush == 8);
+  return pseudoReturnOffset;
+}
+
+void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch,
+                                               ExitFrameType type) {
+  enterFakeExitFrame(cxreg, scratch, type);
+}
+
+// ===============================================================
+// Move instructions
+
+void MacroAssembler::moveValue(const TypedOrValueRegister& src,
+                               const ValueOperand& dest) {
+  if (src.hasValue()) {
+    moveValue(src.valueReg(), dest);
+    return;
+  }
+
+  MIRType type = src.type();
+  AnyRegister reg = src.typedReg();
+
+  if (!IsFloatingPointType(type)) {
+    if (reg.gpr() != dest.payloadReg()) {
+      mov(reg.gpr(), dest.payloadReg());
+    }
+    mov(ImmWord(MIRTypeToTag(type)), dest.typeReg());
+    return;
+  }
+
+  ScratchFloat32Scope scratch(*this);
+  FloatRegister freg = reg.fpu();
+  if (type == MIRType::Float32) {
+    convertFloat32ToDouble(freg, scratch);
+    freg = scratch;
+  }
+  ma_vxfer(freg, dest.payloadReg(), dest.typeReg());
+}
+
+void MacroAssembler::moveValue(const ValueOperand& src,
+                               const ValueOperand& dest) {
+  Register s0 = src.typeReg();
+  Register s1 = src.payloadReg();
+  Register d0 = dest.typeReg();
+  Register d1 = dest.payloadReg();
+
+  // Either one or both of the source registers could be the same as a
+  // destination register.
+  if (s1 == d0) {
+    if (s0 == d1) {
+      // If both are, this is just a swap of two registers.
+      ScratchRegisterScope scratch(*this);
+      MOZ_ASSERT(d1 != scratch);
+      MOZ_ASSERT(d0 != scratch);
+      ma_mov(d1, scratch);
+      ma_mov(d0, d1);
+      ma_mov(scratch, d0);
+      return;
+    }
+    // If only one is, copy that source first.
+    std::swap(s0, s1);
+    std::swap(d0, d1);
+  }
+
+  if (s0 != d0) {
+    ma_mov(s0, d0);
+  }
+  if (s1 != d1) {
+    ma_mov(s1, d1);
+  }
+}
+
+void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
+  ma_mov(Imm32(src.toNunboxTag()), dest.typeReg());
+  if (src.isGCThing()) {
+    ma_mov(ImmGCPtr(src.toGCThing()), dest.payloadReg());
+  } else {
+    ma_mov(Imm32(src.toNunboxPayload()), dest.payloadReg());
+  }
+}
+
+// ===============================================================
+// Branch functions
+
+void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
+  ma_lsr(Imm32(gc::ChunkShift), ptr, buffer);
+  ma_lsl(Imm32(gc::ChunkShift), buffer, buffer);
+  load32(Address(buffer, gc::ChunkStoreBufferOffset), buffer);
+}
+
+void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
+                                             Register temp, Label* label) {
+  Maybe<SecondScratchRegisterScope> scratch2;
+  if (temp == Register::Invalid()) {
+    scratch2.emplace(*this);
+    temp = scratch2.ref();
+  }
+
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  MOZ_ASSERT(ptr != temp);
+
+  ma_lsr(Imm32(gc::ChunkShift), ptr, temp);
+  ma_lsl(Imm32(gc::ChunkShift), temp, temp);
+  loadPtr(Address(temp, gc::ChunkStoreBufferOffset), temp);
+  branchPtr(InvertCondition(cond), temp, ImmWord(0), label);
+}
+
+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
+                                              const Address& address,
+                                              Register temp, Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+
+  Label done;
+
+  branchTestGCThing(Assembler::NotEqual, address,
+                    cond == Assembler::Equal ? &done : label);
+
+  loadPtr(ToPayload(address), temp);
+  SecondScratchRegisterScope scratch2(*this);
+  branchPtrInNurseryChunk(cond, temp, scratch2, label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
+                                              ValueOperand value, Register temp,
+                                              Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+
+  Label done;
+
+  branchTestGCThing(Assembler::NotEqual, value,
+                    cond == Assembler::Equal ? &done : label);
+  branchPtrInNurseryChunk(cond, value.payloadReg(), temp, label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
+                                     const Value& rhs, Label* label) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  // If cond == NotEqual, branch when a.payload != b.payload || a.tag !=
+  // b.tag. If the payloads are equal, compare the tags. If the payloads are
+  // not equal, short circuit true (NotEqual).
+  //
+  // If cand == Equal, branch when a.payload == b.payload && a.tag == b.tag.
+  // If the payloads are equal, compare the tags. If the payloads are not
+  // equal, short circuit false (NotEqual).
+  ScratchRegisterScope scratch(*this);
+
+  if (rhs.isGCThing()) {
+    ma_cmp(lhs.payloadReg(), ImmGCPtr(rhs.toGCThing()), scratch);
+  } else {
+    ma_cmp(lhs.payloadReg(), Imm32(rhs.toNunboxPayload()), scratch);
+  }
+  ma_cmp(lhs.typeReg(), Imm32(rhs.toNunboxTag()), scratch, Equal);
+  ma_b(label, cond);
+}
+
+// ========================================================================
+// Memory access primitives.
+template <typename T>
+void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
+                                       MIRType valueType, const T& dest) {
+  MOZ_ASSERT(valueType < MIRType::Value);
+
+  if (valueType == MIRType::Double) {
+    storeDouble(value.reg().typedReg().fpu(), dest);
+    return;
+  }
+
+  // Store the type tag.
+  storeTypeTag(ImmType(ValueTypeFromMIRType(valueType)), dest);
+
+  // Store the payload.
+  if (value.constant()) {
+    storePayload(value.value(), dest);
+  } else {
+    storePayload(value.reg().typedReg().gpr(), dest);
+  }
+}
+
+template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
+                                                MIRType valueType,
+                                                const Address& dest);
+template void MacroAssembler::storeUnboxedValue(
+    const ConstantOrRegister& value, MIRType valueType,
+    const BaseObjectElementIndex& dest);
+
+CodeOffset MacroAssembler::wasmTrapInstruction() {
+  return CodeOffset(as_illegal_trap().getOffset());
+}
+
+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
+                                       Register boundsCheckLimit, Label* ok) {
+  as_cmp(index, O2Reg(boundsCheckLimit));
+  as_b(ok, cond);
+  if (JitOptions.spectreIndexMasking) {
+    ma_mov(boundsCheckLimit, index, LeaveCC, cond);
+  }
+}
+
+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
+                                       Address boundsCheckLimit, Label* ok) {
+  ScratchRegisterScope scratch(*this);
+  ma_ldr(DTRAddr(boundsCheckLimit.base, DtrOffImm(boundsCheckLimit.offset)),
+         scratch);
+  as_cmp(index, O2Reg(scratch));
+  as_b(ok, cond);
+  if (JitOptions.spectreIndexMasking) {
+    ma_mov(scratch, index, LeaveCC, cond);
+  }
+}
+
+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
+                                       Register64 boundsCheckLimit, Label* ok) {
+  Label notOk;
+  cmp32(index.high, Imm32(0));
+  j(Assembler::NonZero, &notOk);
+  wasmBoundsCheck32(cond, index.low, boundsCheckLimit.low, ok);
+  bind(&notOk);
+}
+
+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
+                                       Address boundsCheckLimit, Label* ok) {
+  Label notOk;
+  cmp32(index.high, Imm32(0));
+  j(Assembler::NonZero, &notOk);
+  wasmBoundsCheck32(cond, index.low, boundsCheckLimit, ok);
+  bind(&notOk);
+}
+
+void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input,
+                                                Register output,
+                                                bool isSaturating,
+                                                Label* oolEntry) {
+  wasmTruncateToInt32(input, output, MIRType::Double, /* isUnsigned= */ true,
+                      isSaturating, oolEntry);
+}
+
+void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input,
+                                               Register output,
+                                               bool isSaturating,
+                                               Label* oolEntry) {
+  wasmTruncateToInt32(input, output, MIRType::Double, /* isUnsigned= */ false,
+                      isSaturating, oolEntry);
+}
+
+void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input,
+                                                 Register output,
+                                                 bool isSaturating,
+                                                 Label* oolEntry) {
+  wasmTruncateToInt32(input, output, MIRType::Float32, /* isUnsigned= */ true,
+                      isSaturating, oolEntry);
+}
+
+void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input,
+                                                Register output,
+                                                bool isSaturating,
+                                                Label* oolEntry) {
+  wasmTruncateToInt32(input, output, MIRType::Float32, /* isUnsigned= */ false,
+                      isSaturating, oolEntry);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input,
+                                                  Register output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  outOfLineWasmTruncateToIntCheck(input, MIRType::Float32, MIRType::Int32,
+                                  flags, rejoin, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input,
+                                                  Register output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  outOfLineWasmTruncateToIntCheck(input, MIRType::Double, MIRType::Int32, flags,
+                                  rejoin, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input,
+                                                  Register64 output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  outOfLineWasmTruncateToIntCheck(input, MIRType::Float32, MIRType::Int64,
+                                  flags, rejoin, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input,
+                                                  Register64 output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  outOfLineWasmTruncateToIntCheck(input, MIRType::Double, MIRType::Int64, flags,
+                                  rejoin, off);
+}
+
+void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
+                              Register memoryBase, Register ptr,
+                              Register ptrScratch, AnyRegister output) {
+  wasmLoadImpl(access, memoryBase, ptr, ptrScratch, output,
+               Register64::Invalid());
+}
+
+void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
+                                 Register memoryBase, Register ptr,
+                                 Register ptrScratch, Register64 output) {
+  MOZ_ASSERT_IF(access.isAtomic(), access.byteSize() <= 4);
+  wasmLoadImpl(access, memoryBase, ptr, ptrScratch, AnyRegister(), output);
+}
+
+void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
+                               AnyRegister value, Register memoryBase,
+                               Register ptr, Register ptrScratch) {
+  wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr,
+                ptrScratch);
+}
+
+void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
+                                  Register64 value, Register memoryBase,
+                                  Register ptr, Register ptrScratch) {
+  MOZ_ASSERT(!access.isAtomic());
+  wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr, ptrScratch);
+}
+
+// ========================================================================
+// Primitive atomic operations.
+
+static Register ComputePointerForAtomic(MacroAssembler& masm,
+                                        const BaseIndex& src, Register r) {
+  Register base = src.base;
+  Register index = src.index;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+  int32_t offset = src.offset;
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.as_add(r, base, lsl(index, scale));
+  if (offset != 0) {
+    masm.ma_add(r, Imm32(offset), r, scratch);
+  }
+  return r;
+}
+
+static Register ComputePointerForAtomic(MacroAssembler& masm,
+                                        const Address& src, Register r) {
+  ScratchRegisterScope scratch(masm);
+  if (src.offset == 0) {
+    return src.base;
+  }
+  masm.ma_add(src.base, Imm32(src.offset), r, scratch);
+  return r;
+}
+
+// General algorithm:
+//
+//     ...    ptr, <addr>         ; compute address of item
+//     dmb
+// L0  ldrex* output, [ptr]
+//     sxt*   output, output, 0   ; sign-extend if applicable
+//     *xt*   tmp, oldval, 0      ; sign-extend or zero-extend if applicable
+//     cmp    output, tmp
+//     bne    L1                  ; failed - values are different
+//     strex* tmp, newval, [ptr]
+//     cmp    tmp, 1
+//     beq    L0                  ; failed - location is dirty, retry
+// L1  dmb
+//
+// Discussion here:  http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html.
+// However note that that discussion uses 'isb' as the trailing fence.
+// I've not quite figured out why, and I've gone with dmb here which
+// is safe.  Also see the LLVM source, which uses 'dmb ish' generally.
+// (Apple's Swift CPU apparently handles ish in a non-default, faster
+// way.)
+
+template <typename T>
+static void CompareExchange(MacroAssembler& masm,
+                            const wasm::MemoryAccessDesc* access,
+                            Scalar::Type type, const Synchronization& sync,
+                            const T& mem, Register oldval, Register newval,
+                            Register output) {
+  bool signExtend = Scalar::isSignedIntType(type);
+  unsigned nbytes = Scalar::byteSize(type);
+
+  MOZ_ASSERT(nbytes <= 4);
+
+  Label again;
+  Label done;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  ScratchRegisterScope scratch(masm);
+
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+
+  BufferOffset firstAccess;
+  switch (nbytes) {
+    case 1:
+      firstAccess = masm.as_ldrexb(output, ptr);
+      if (signExtend) {
+        masm.as_sxtb(output, output, 0);
+        masm.as_sxtb(scratch, oldval, 0);
+      } else {
+        masm.as_uxtb(scratch, oldval, 0);
+      }
+      break;
+    case 2:
+      firstAccess = masm.as_ldrexh(output, ptr);
+      if (signExtend) {
+        masm.as_sxth(output, output, 0);
+        masm.as_sxth(scratch, oldval, 0);
+      } else {
+        masm.as_uxth(scratch, oldval, 0);
+      }
+      break;
+    case 4:
+      firstAccess = masm.as_ldrex(output, ptr);
+      break;
+  }
+  if (access) {
+    masm.append(*access, firstAccess.getOffset());
+  }
+
+  if (nbytes < 4) {
+    masm.as_cmp(output, O2Reg(scratch));
+  } else {
+    masm.as_cmp(output, O2Reg(oldval));
+  }
+  masm.as_b(&done, MacroAssembler::NotEqual);
+  switch (nbytes) {
+    case 1:
+      masm.as_strexb(scratch, newval, ptr);
+      break;
+    case 2:
+      masm.as_strexh(scratch, newval, ptr);
+      break;
+    case 4:
+      masm.as_strex(scratch, newval, ptr);
+      break;
+  }
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+  masm.bind(&done);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::compareExchange(Scalar::Type type,
+                                     const Synchronization& sync,
+                                     const Address& address, Register oldval,
+                                     Register newval, Register output) {
+  CompareExchange(*this, nullptr, type, sync, address, oldval, newval, output);
+}
+
+void MacroAssembler::compareExchange(Scalar::Type type,
+                                     const Synchronization& sync,
+                                     const BaseIndex& address, Register oldval,
+                                     Register newval, Register output) {
+  CompareExchange(*this, nullptr, type, sync, address, oldval, newval, output);
+}
+
+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
+                                         const Address& mem, Register oldval,
+                                         Register newval, Register output) {
+  CompareExchange(*this, &access, access.type(), access.sync(), mem, oldval,
+                  newval, output);
+}
+
+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
+                                         const BaseIndex& mem, Register oldval,
+                                         Register newval, Register output) {
+  CompareExchange(*this, &access, access.type(), access.sync(), mem, oldval,
+                  newval, output);
+}
+
+template <typename T>
+static void AtomicExchange(MacroAssembler& masm,
+                           const wasm::MemoryAccessDesc* access,
+                           Scalar::Type type, const Synchronization& sync,
+                           const T& mem, Register value, Register output) {
+  bool signExtend = Scalar::isSignedIntType(type);
+  unsigned nbytes = Scalar::byteSize(type);
+
+  MOZ_ASSERT(nbytes <= 4);
+
+  // Bug 1077321: We may further optimize for ARMv8 (AArch32) here.
+  Label again;
+  Label done;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  ScratchRegisterScope scratch(masm);
+
+  // NOTE: the generated code must match the assembly code in gen_exchange in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+
+  BufferOffset firstAccess;
+  switch (nbytes) {
+    case 1:
+      firstAccess = masm.as_ldrexb(output, ptr);
+      if (signExtend) {
+        masm.as_sxtb(output, output, 0);
+      }
+      masm.as_strexb(scratch, value, ptr);
+      break;
+    case 2:
+      firstAccess = masm.as_ldrexh(output, ptr);
+      if (signExtend) {
+        masm.as_sxth(output, output, 0);
+      }
+      masm.as_strexh(scratch, value, ptr);
+      break;
+    case 4:
+      firstAccess = masm.as_ldrex(output, ptr);
+      masm.as_strex(scratch, value, ptr);
+      break;
+  }
+  if (access) {
+    masm.append(*access, firstAccess.getOffset());
+  }
+
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+  masm.bind(&done);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::atomicExchange(Scalar::Type type,
+                                    const Synchronization& sync,
+                                    const Address& address, Register value,
+                                    Register output) {
+  AtomicExchange(*this, nullptr, type, sync, address, value, output);
+}
+
+void MacroAssembler::atomicExchange(Scalar::Type type,
+                                    const Synchronization& sync,
+                                    const BaseIndex& address, Register value,
+                                    Register output) {
+  AtomicExchange(*this, nullptr, type, sync, address, value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
+                                        const Address& mem, Register value,
+                                        Register output) {
+  AtomicExchange(*this, &access, access.type(), access.sync(), mem, value,
+                 output);
+}
+
+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
+                                        const BaseIndex& mem, Register value,
+                                        Register output) {
+  AtomicExchange(*this, &access, access.type(), access.sync(), mem, value,
+                 output);
+}
+
+// General algorithm:
+//
+//     ...    ptr, <addr>         ; compute address of item
+//     dmb
+// L0  ldrex* output, [ptr]
+//     sxt*   output, output, 0   ; sign-extend if applicable
+//     OP     tmp, output, value  ; compute value to store
+//     strex* tmp2, tmp, [ptr]    ; tmp2 required by strex
+//     cmp    tmp2, 1
+//     beq    L0                  ; failed - location is dirty, retry
+//     dmb                        ; ordering barrier required
+//
+// Also see notes above at compareExchange re the barrier strategy.
+//
+// Observe that the value being operated into the memory element need
+// not be sign-extended because no OP will make use of bits to the
+// left of the bits indicated by the width of the element, and neither
+// output nor the bits stored are affected by OP.
+
+template <typename T>
+static void AtomicFetchOp(MacroAssembler& masm,
+                          const wasm::MemoryAccessDesc* access,
+                          Scalar::Type type, const Synchronization& sync,
+                          AtomicOp op, const Register& value, const T& mem,
+                          Register flagTemp, Register output) {
+  bool signExtend = Scalar::isSignedIntType(type);
+  unsigned nbytes = Scalar::byteSize(type);
+
+  MOZ_ASSERT(nbytes <= 4);
+  MOZ_ASSERT(flagTemp != InvalidReg);
+  MOZ_ASSERT(output != value);
+
+  Label again;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  // NOTE: the generated code must match the assembly code in gen_fetchop in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.bind(&again);
+
+  BufferOffset firstAccess;
+  switch (nbytes) {
+    case 1:
+      firstAccess = masm.as_ldrexb(output, ptr);
+      if (signExtend) {
+        masm.as_sxtb(output, output, 0);
+      }
+      break;
+    case 2:
+      firstAccess = masm.as_ldrexh(output, ptr);
+      if (signExtend) {
+        masm.as_sxth(output, output, 0);
+      }
+      break;
+    case 4:
+      firstAccess = masm.as_ldrex(output, ptr);
+      break;
+  }
+  if (access) {
+    masm.append(*access, firstAccess.getOffset());
+  }
+
+  switch (op) {
+    case AtomicFetchAddOp:
+      masm.as_add(scratch, output, O2Reg(value));
+      break;
+    case AtomicFetchSubOp:
+      masm.as_sub(scratch, output, O2Reg(value));
+      break;
+    case AtomicFetchAndOp:
+      masm.as_and(scratch, output, O2Reg(value));
+      break;
+    case AtomicFetchOrOp:
+      masm.as_orr(scratch, output, O2Reg(value));
+      break;
+    case AtomicFetchXorOp:
+      masm.as_eor(scratch, output, O2Reg(value));
+      break;
+  }
+  // Rd must differ from the two other arguments to strex.
+  switch (nbytes) {
+    case 1:
+      masm.as_strexb(flagTemp, scratch, ptr);
+      break;
+    case 2:
+      masm.as_strexh(flagTemp, scratch, ptr);
+      break;
+    case 4:
+      masm.as_strex(flagTemp, scratch, ptr);
+      break;
+  }
+  masm.as_cmp(flagTemp, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::atomicFetchOp(Scalar::Type type,
+                                   const Synchronization& sync, AtomicOp op,
+                                   Register value, const Address& mem,
+                                   Register temp, Register output) {
+  AtomicFetchOp(*this, nullptr, type, sync, op, value, mem, temp, output);
+}
+
+void MacroAssembler::atomicFetchOp(Scalar::Type type,
+                                   const Synchronization& sync, AtomicOp op,
+                                   Register value, const BaseIndex& mem,
+                                   Register temp, Register output) {
+  AtomicFetchOp(*this, nullptr, type, sync, op, value, mem, temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
+                                       AtomicOp op, Register value,
+                                       const Address& mem, Register temp,
+                                       Register output) {
+  AtomicFetchOp(*this, &access, access.type(), access.sync(), op, value, mem,
+                temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
+                                       AtomicOp op, Register value,
+                                       const BaseIndex& mem, Register temp,
+                                       Register output) {
+  AtomicFetchOp(*this, &access, access.type(), access.sync(), op, value, mem,
+                temp, output);
+}
+
+// Uses both scratch registers, one for the address and one for a temp,
+// but needs two temps for strex:
+//
+//     ...    ptr, <addr>         ; compute address of item
+//     dmb
+// L0  ldrex* temp, [ptr]
+//     OP     temp, temp, value   ; compute value to store
+//     strex* temp2, temp, [ptr]
+//     cmp    temp2, 1
+//     beq    L0                  ; failed - location is dirty, retry
+//     dmb                        ; ordering barrier required
+
+template <typename T>
+static void AtomicEffectOp(MacroAssembler& masm,
+                           const wasm::MemoryAccessDesc* access,
+                           Scalar::Type type, const Synchronization& sync,
+                           AtomicOp op, const Register& value, const T& mem,
+                           Register flagTemp) {
+  unsigned nbytes = Scalar::byteSize(type);
+
+  MOZ_ASSERT(nbytes <= 4);
+  MOZ_ASSERT(flagTemp != InvalidReg);
+
+  Label again;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  masm.memoryBarrierBefore(sync);
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.bind(&again);
+
+  BufferOffset firstAccess;
+  switch (nbytes) {
+    case 1:
+      firstAccess = masm.as_ldrexb(scratch, ptr);
+      break;
+    case 2:
+      firstAccess = masm.as_ldrexh(scratch, ptr);
+      break;
+    case 4:
+      firstAccess = masm.as_ldrex(scratch, ptr);
+      break;
+  }
+  if (access) {
+    masm.append(*access, firstAccess.getOffset());
+  }
+
+  switch (op) {
+    case AtomicFetchAddOp:
+      masm.as_add(scratch, scratch, O2Reg(value));
+      break;
+    case AtomicFetchSubOp:
+      masm.as_sub(scratch, scratch, O2Reg(value));
+      break;
+    case AtomicFetchAndOp:
+      masm.as_and(scratch, scratch, O2Reg(value));
+      break;
+    case AtomicFetchOrOp:
+      masm.as_orr(scratch, scratch, O2Reg(value));
+      break;
+    case AtomicFetchXorOp:
+      masm.as_eor(scratch, scratch, O2Reg(value));
+      break;
+  }
+  // Rd must differ from the two other arguments to strex.
+  switch (nbytes) {
+    case 1:
+      masm.as_strexb(flagTemp, scratch, ptr);
+      break;
+    case 2:
+      masm.as_strexh(flagTemp, scratch, ptr);
+      break;
+    case 4:
+      masm.as_strex(flagTemp, scratch, ptr);
+      break;
+  }
+  masm.as_cmp(flagTemp, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
+                                        AtomicOp op, Register value,
+                                        const Address& mem, Register temp) {
+  AtomicEffectOp(*this, &access, access.type(), access.sync(), op, value, mem,
+                 temp);
+}
+
+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
+                                        AtomicOp op, Register value,
+                                        const BaseIndex& mem, Register temp) {
+  AtomicEffectOp(*this, &access, access.type(), access.sync(), op, value, mem,
+                 temp);
+}
+
+template <typename T>
+static void AtomicLoad64(MacroAssembler& masm,
+                         const wasm::MemoryAccessDesc* access,
+                         const Synchronization& sync, const T& mem,
+                         Register64 output) {
+  MOZ_ASSERT((output.low.code() & 1) == 0);
+  MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+  masm.memoryBarrierBefore(sync);
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  BufferOffset load = masm.as_ldrexd(output.low, output.high, ptr);
+  if (access) {
+    masm.append(*access, load.getOffset());
+  }
+  masm.as_clrex();
+
+  masm.memoryBarrierAfter(sync);
+}
+
+template <typename T>
+static void WasmAtomicLoad64(MacroAssembler& masm,
+                             const wasm::MemoryAccessDesc& access, const T& mem,
+                             Register64 temp, Register64 output) {
+  MOZ_ASSERT(temp.low == InvalidReg && temp.high == InvalidReg);
+
+  AtomicLoad64(masm, &access, access.sync(), mem, output);
+}
+
+void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access,
+                                      const Address& mem, Register64 temp,
+                                      Register64 output) {
+  WasmAtomicLoad64(*this, access, mem, temp, output);
+}
+
+void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access,
+                                      const BaseIndex& mem, Register64 temp,
+                                      Register64 output) {
+  WasmAtomicLoad64(*this, access, mem, temp, output);
+}
+
+template <typename T>
+static void CompareExchange64(MacroAssembler& masm,
+                              const wasm::MemoryAccessDesc* access,
+                              const Synchronization& sync, const T& mem,
+                              Register64 expect, Register64 replace,
+                              Register64 output) {
+  MOZ_ASSERT(expect != replace && replace != output && output != expect);
+
+  MOZ_ASSERT((replace.low.code() & 1) == 0);
+  MOZ_ASSERT(replace.low.code() + 1 == replace.high.code());
+
+  MOZ_ASSERT((output.low.code() & 1) == 0);
+  MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+  Label again;
+  Label done;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+  BufferOffset load = masm.as_ldrexd(output.low, output.high, ptr);
+  if (access) {
+    masm.append(*access, load.getOffset());
+  }
+
+  masm.as_cmp(output.low, O2Reg(expect.low));
+  masm.as_cmp(output.high, O2Reg(expect.high), MacroAssembler::Equal);
+  masm.as_b(&done, MacroAssembler::NotEqual);
+
+  ScratchRegisterScope scratch(masm);
+
+  // Rd (temp) must differ from the two other arguments to strex.
+  masm.as_strexd(scratch, replace.low, replace.high, ptr);
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+  masm.bind(&done);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
+                                           const Address& mem,
+                                           Register64 expect,
+                                           Register64 replace,
+                                           Register64 output) {
+  CompareExchange64(*this, &access, access.sync(), mem, expect, replace,
+                    output);
+}
+
+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
+                                           const BaseIndex& mem,
+                                           Register64 expect,
+                                           Register64 replace,
+                                           Register64 output) {
+  CompareExchange64(*this, &access, access.sync(), mem, expect, replace,
+                    output);
+}
+
+void MacroAssembler::compareExchange64(const Synchronization& sync,
+                                       const Address& mem, Register64 expect,
+                                       Register64 replace, Register64 output) {
+  CompareExchange64(*this, nullptr, sync, mem, expect, replace, output);
+}
+
+void MacroAssembler::compareExchange64(const Synchronization& sync,
+                                       const BaseIndex& mem, Register64 expect,
+                                       Register64 replace, Register64 output) {
+  CompareExchange64(*this, nullptr, sync, mem, expect, replace, output);
+}
+
+template <typename T>
+static void AtomicExchange64(MacroAssembler& masm,
+                             const wasm::MemoryAccessDesc* access,
+                             const Synchronization& sync, const T& mem,
+                             Register64 value, Register64 output) {
+  MOZ_ASSERT(output != value);
+
+  MOZ_ASSERT((value.low.code() & 1) == 0);
+  MOZ_ASSERT(value.low.code() + 1 == value.high.code());
+
+  MOZ_ASSERT((output.low.code() & 1) == 0);
+  MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+  Label again;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+  BufferOffset load = masm.as_ldrexd(output.low, output.high, ptr);
+  if (access) {
+    masm.append(*access, load.getOffset());
+  }
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.as_strexd(scratch, value.low, value.high, ptr);
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+template <typename T>
+static void WasmAtomicExchange64(MacroAssembler& masm,
+                                 const wasm::MemoryAccessDesc& access,
+                                 const T& mem, Register64 value,
+                                 Register64 output) {
+  AtomicExchange64(masm, &access, access.sync(), mem, value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
+                                          const Address& mem, Register64 value,
+                                          Register64 output) {
+  WasmAtomicExchange64(*this, access, mem, value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
+                                          const BaseIndex& mem,
+                                          Register64 value, Register64 output) {
+  WasmAtomicExchange64(*this, access, mem, value, output);
+}
+
+void MacroAssembler::atomicExchange64(const Synchronization& sync,
+                                      const Address& mem, Register64 value,
+                                      Register64 output) {
+  AtomicExchange64(*this, nullptr, sync, mem, value, output);
+}
+
+void MacroAssembler::atomicExchange64(const Synchronization& sync,
+                                      const BaseIndex& mem, Register64 value,
+                                      Register64 output) {
+  AtomicExchange64(*this, nullptr, sync, mem, value, output);
+}
+
+template <typename T>
+static void AtomicFetchOp64(MacroAssembler& masm,
+                            const wasm::MemoryAccessDesc* access,
+                            const Synchronization& sync, AtomicOp op,
+                            Register64 value, const T& mem, Register64 temp,
+                            Register64 output) {
+  MOZ_ASSERT(temp.low != InvalidReg && temp.high != InvalidReg);
+  MOZ_ASSERT(output != value);
+  MOZ_ASSERT(temp != value);
+
+  MOZ_ASSERT((temp.low.code() & 1) == 0);
+  MOZ_ASSERT(temp.low.code() + 1 == temp.high.code());
+
+  // We could avoid this pair requirement but in that case we would end up
+  // with two moves in the loop to preserve the loaded value in output.  The
+  // prize would be less register spilling around this op since the pair
+  // requirement will tend to force more spilling.
+
+  MOZ_ASSERT((output.low.code() & 1) == 0);
+  MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+  Label again;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+  BufferOffset load = masm.as_ldrexd(output.low, output.high, ptr);
+  if (access) {
+    masm.append(*access, load.getOffset());
+  }
+  switch (op) {
+    case AtomicFetchAddOp:
+      masm.as_add(temp.low, output.low, O2Reg(value.low), SetCC);
+      masm.as_adc(temp.high, output.high, O2Reg(value.high));
+      break;
+    case AtomicFetchSubOp:
+      masm.as_sub(temp.low, output.low, O2Reg(value.low), SetCC);
+      masm.as_sbc(temp.high, output.high, O2Reg(value.high));
+      break;
+    case AtomicFetchAndOp:
+      masm.as_and(temp.low, output.low, O2Reg(value.low));
+      masm.as_and(temp.high, output.high, O2Reg(value.high));
+      break;
+    case AtomicFetchOrOp:
+      masm.as_orr(temp.low, output.low, O2Reg(value.low));
+      masm.as_orr(temp.high, output.high, O2Reg(value.high));
+      break;
+    case AtomicFetchXorOp:
+      masm.as_eor(temp.low, output.low, O2Reg(value.low));
+      masm.as_eor(temp.high, output.high, O2Reg(value.high));
+      break;
+  }
+
+  ScratchRegisterScope scratch(masm);
+
+  // Rd (temp) must differ from the two other arguments to strex.
+  masm.as_strexd(scratch, temp.low, temp.high, ptr);
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+template <typename T>
+static void WasmAtomicFetchOp64(MacroAssembler& masm,
+                                const wasm::MemoryAccessDesc& access,
+                                AtomicOp op, Register64 value, const T& mem,
+                                Register64 temp, Register64 output) {
+  AtomicFetchOp64(masm, &access, access.sync(), op, value, mem, temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
+                                         AtomicOp op, Register64 value,
+                                         const Address& mem, Register64 temp,
+                                         Register64 output) {
+  WasmAtomicFetchOp64(*this, access, op, value, mem, temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
+                                         AtomicOp op, Register64 value,
+                                         const BaseIndex& mem, Register64 temp,
+                                         Register64 output) {
+  WasmAtomicFetchOp64(*this, access, op, value, mem, temp, output);
+}
+
+void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
+                                     Register64 value, const Address& mem,
+                                     Register64 temp, Register64 output) {
+  AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, output);
+}
+
+void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
+                                     Register64 value, const BaseIndex& mem,
+                                     Register64 temp, Register64 output) {
+  AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, output);
+}
+
+void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
+                                      Register64 value, const Address& mem,
+                                      Register64 temp) {
+  AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, temp);
+}
+
+void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
+                                      Register64 value, const BaseIndex& mem,
+                                      Register64 temp) {
+  AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, temp);
+}
+
+// ========================================================================
+// JS atomic operations.
+
+template <typename T>
+static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
+                              const Synchronization& sync, const T& mem,
+                              Register oldval, Register newval, Register temp,
+                              AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.compareExchange(arrayType, sync, mem, oldval, newval, temp);
+    masm.convertUInt32ToDouble(temp, output.fpu());
+  } else {
+    masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr());
+  }
+}
+
+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
+                                       const Synchronization& sync,
+                                       const Address& mem, Register oldval,
+                                       Register newval, Register temp,
+                                       AnyRegister output) {
+  CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
+}
+
+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
+                                       const Synchronization& sync,
+                                       const BaseIndex& mem, Register oldval,
+                                       Register newval, Register temp,
+                                       AnyRegister output) {
+  CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
+}
+
+template <typename T>
+static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
+                             const Synchronization& sync, const T& mem,
+                             Register value, Register temp,
+                             AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.atomicExchange(arrayType, sync, mem, value, temp);
+    masm.convertUInt32ToDouble(temp, output.fpu());
+  } else {
+    masm.atomicExchange(arrayType, sync, mem, value, output.gpr());
+  }
+}
+
+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
+                                      const Synchronization& sync,
+                                      const Address& mem, Register value,
+                                      Register temp, AnyRegister output) {
+  AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
+}
+
+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
+                                      const Synchronization& sync,
+                                      const BaseIndex& mem, Register value,
+                                      Register temp, AnyRegister output) {
+  AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
+}
+
+template <typename T>
+static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType,
+                            const Synchronization& sync, AtomicOp op,
+                            Register value, const T& mem, Register temp1,
+                            Register temp2, AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1);
+    masm.convertUInt32ToDouble(temp1, output.fpu());
+  } else {
+    masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr());
+  }
+}
+
+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
+                                     const Synchronization& sync, AtomicOp op,
+                                     Register value, const Address& mem,
+                                     Register temp1, Register temp2,
+                                     AnyRegister output) {
+  AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
+}
+
+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
+                                     const Synchronization& sync, AtomicOp op,
+                                     Register value, const BaseIndex& mem,
+                                     Register temp1, Register temp2,
+                                     AnyRegister output) {
+  AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
+}
+
+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
+                                      const Synchronization& sync, AtomicOp op,
+                                      Register value, const BaseIndex& mem,
+                                      Register temp) {
+  AtomicEffectOp(*this, nullptr, arrayType, sync, op, value, mem, temp);
+}
+
+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
+                                      const Synchronization& sync, AtomicOp op,
+                                      Register value, const Address& mem,
+                                      Register temp) {
+  AtomicEffectOp(*this, nullptr, arrayType, sync, op, value, mem, temp);
+}
+
+// ========================================================================
+// Primitive atomic operations.
+
+void MacroAssembler::atomicLoad64(const Synchronization& sync,
+                                  const Address& mem, Register64 output) {
+  AtomicLoad64(*this, nullptr, sync, mem, output);
+}
+
+void MacroAssembler::atomicLoad64(const Synchronization& sync,
+                                  const BaseIndex& mem, Register64 output) {
+  AtomicLoad64(*this, nullptr, sync, mem, output);
+}
+
+void MacroAssembler::atomicStore64(const Synchronization& sync,
+                                   const Address& mem, Register64 value,
+                                   Register64 temp) {
+  AtomicExchange64(*this, nullptr, sync, mem, value, temp);
+}
+
+void MacroAssembler::atomicStore64(const Synchronization& sync,
+                                   const BaseIndex& mem, Register64 value,
+                                   Register64 temp) {
+  AtomicExchange64(*this, nullptr, sync, mem, value, temp);
+}
+
+// ========================================================================
+// Convert floating point.
+
+bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
+
+void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
+                                           Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+  ScratchDoubleScope scratchDouble(*this);
+
+  convertUInt32ToDouble(src.high, dest);
+  {
+    ScratchRegisterScope scratch(*this);
+    movePtr(ImmPtr(&TO_DOUBLE_HIGH_SCALE), scratch);
+    ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), scratchDouble);
+  }
+  mulDouble(scratchDouble, dest);
+  convertUInt32ToDouble(src.low, scratchDouble);
+  addDouble(scratchDouble, dest);
+}
+
+void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) {
+  ScratchDoubleScope scratchDouble(*this);
+
+  convertInt32ToDouble(src.high, dest);
+  {
+    ScratchRegisterScope scratch(*this);
+    movePtr(ImmPtr(&TO_DOUBLE_HIGH_SCALE), scratch);
+    ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), scratchDouble);
+  }
+  mulDouble(scratchDouble, dest);
+  convertUInt32ToDouble(src.low, scratchDouble);
+  addDouble(scratchDouble, dest);
+}
+
+void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
+  convertInt32ToDouble(src, dest);
+}
+
+extern "C" {
+extern MOZ_EXPORT int64_t __aeabi_idivmod(int, int);
+extern MOZ_EXPORT int64_t __aeabi_uidivmod(int, int);
+}
+
+inline void EmitRemainderOrQuotient(bool isRemainder, MacroAssembler& masm,
+                                    Register rhs, Register lhsOutput,
+                                    bool isUnsigned,
+                                    const LiveRegisterSet& volatileLiveRegs) {
+  // Currently this helper can't handle this situation.
+  MOZ_ASSERT(lhsOutput != rhs);
+
+  if (HasIDIV()) {
+    if (isRemainder) {
+      masm.remainder32(rhs, lhsOutput, isUnsigned);
+    } else {
+      masm.quotient32(rhs, lhsOutput, isUnsigned);
+    }
+  } else {
+    // Ensure that the output registers are saved and restored properly,
+    MOZ_ASSERT(volatileLiveRegs.has(ReturnRegVal0));
+    MOZ_ASSERT(volatileLiveRegs.has(ReturnRegVal1));
+
+    masm.PushRegsInMask(volatileLiveRegs);
+    using Fn = int64_t (*)(int, int);
+    {
+      ScratchRegisterScope scratch(masm);
+      masm.setupUnalignedABICall(scratch);
+    }
+    masm.passABIArg(lhsOutput);
+    masm.passABIArg(rhs);
+    if (isUnsigned) {
+      masm.callWithABI<Fn, __aeabi_uidivmod>(
+          MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+    } else {
+      masm.callWithABI<Fn, __aeabi_idivmod>(
+          MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+    }
+    if (isRemainder) {
+      masm.mov(ReturnRegVal1, lhsOutput);
+    } else {
+      masm.mov(ReturnRegVal0, lhsOutput);
+    }
+
+    LiveRegisterSet ignore;
+    ignore.add(lhsOutput);
+    masm.PopRegsInMaskIgnore(volatileLiveRegs, ignore);
+  }
+}
+
+void MacroAssembler::flexibleQuotient32(
+    Register rhs, Register srcDest, bool isUnsigned,
+    const LiveRegisterSet& volatileLiveRegs) {
+  EmitRemainderOrQuotient(false, *this, rhs, srcDest, isUnsigned,
+                          volatileLiveRegs);
+}
+
+void MacroAssembler::flexibleRemainder32(
+    Register rhs, Register srcDest, bool isUnsigned,
+    const LiveRegisterSet& volatileLiveRegs) {
+  EmitRemainderOrQuotient(true, *this, rhs, srcDest, isUnsigned,
+                          volatileLiveRegs);
+}
+
+void MacroAssembler::flexibleDivMod32(Register rhs, Register lhsOutput,
+                                      Register remOutput, bool isUnsigned,
+                                      const LiveRegisterSet& volatileLiveRegs) {
+  // Currently this helper can't handle this situation.
+  MOZ_ASSERT(lhsOutput != rhs);
+
+  if (HasIDIV()) {
+    mov(lhsOutput, remOutput);
+    remainder32(rhs, remOutput, isUnsigned);
+    quotient32(rhs, lhsOutput, isUnsigned);
+  } else {
+    // Ensure that the output registers are saved and restored properly,
+    MOZ_ASSERT(volatileLiveRegs.has(ReturnRegVal0));
+    MOZ_ASSERT(volatileLiveRegs.has(ReturnRegVal1));
+    PushRegsInMask(volatileLiveRegs);
+
+    using Fn = int64_t (*)(int, int);
+    {
+      ScratchRegisterScope scratch(*this);
+      setupUnalignedABICall(scratch);
+    }
+    passABIArg(lhsOutput);
+    passABIArg(rhs);
+    if (isUnsigned) {
+      callWithABI<Fn, __aeabi_uidivmod>(MoveOp::GENERAL,
+                                        CheckUnsafeCallWithABI::DontCheckOther);
+    } else {
+      callWithABI<Fn, __aeabi_idivmod>(MoveOp::GENERAL,
+                                       CheckUnsafeCallWithABI::DontCheckOther);
+    }
+    moveRegPair(ReturnRegVal0, ReturnRegVal1, lhsOutput, remOutput);
+
+    LiveRegisterSet ignore;
+    ignore.add(remOutput);
+    ignore.add(lhsOutput);
+    PopRegsInMaskIgnore(volatileLiveRegs, ignore);
+  }
+}
+
+CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
+  return movWithPatch(ImmPtr(nullptr), dest);
+}
+
+void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
+                                          CodeLocationLabel target) {
+  PatchDataWithValueCheck(loc, ImmPtr(target.raw()), ImmPtr(nullptr));
+}
+
+// ========================================================================
+// Spectre Mitigations.
+
+void MacroAssembler::speculationBarrier() {
+  // Spectre mitigation recommended by ARM for cases where csel/cmov cannot be
+  // used.
+  as_csdb();
+}
+
+void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
+                                         Label* fail) {
+  floorf(src, dest, fail);
+}
+
+void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  floor(src, dest, fail);
+}
+
+void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  ceilf(src, dest, fail);
+}
+
+void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest,
+                                       Label* fail) {
+  ceil(src, dest, fail);
+}
+
+void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest,
+                                         FloatRegister temp, Label* fail) {
+  roundf(src, dest, fail, temp);
+}
+
+void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest,
+                                        FloatRegister temp, Label* fail) {
+  round(src, dest, fail, temp);
+}
+
+void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest,
+                                         Label* fail) {
+  truncf(src, dest, fail);
+}
+
+void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  trunc(src, dest, fail);
+}
+
+void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src,
+                                     FloatRegister dest) {
+  MOZ_CRASH("not supported on this platform");
+}
+
+void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src,
+                                      FloatRegister dest) {
+  MOZ_CRASH("not supported on this platform");
+}
+
+void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
+                                    FloatRegister output) {
+  MOZ_CRASH("not supported on this platform");
+}
+
+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
+                                        Register pointer) {
+  if (IsShiftInScaleRange(shift)) {
+    computeEffectiveAddress(
+        BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer);
+    return;
+  }
+  lshift32(Imm32(shift), indexTemp32);
+  addPtr(indexTemp32, pointer);
+}
+
+//}}} check_macroassembler_style
+
+void MacroAssemblerARM::wasmTruncateToInt32(FloatRegister input,
+                                            Register output, MIRType fromType,
+                                            bool isUnsigned, bool isSaturating,
+                                            Label* oolEntry) {
+  ScratchDoubleScope scratchScope(asMasm());
+  ScratchRegisterScope scratchReg(asMasm());
+  FloatRegister scratch = scratchScope.uintOverlay();
+
+  // ARM conversion instructions clamp the value to ensure it fits within the
+  // target's type bounds, so every time we see those, we need to check the
+  // input. A NaN check is not necessary because NaN is converted to zero and
+  // on a zero result we branch out of line to do further processing anyway.
+  if (isUnsigned) {
+    if (fromType == MIRType::Double) {
+      ma_vcvt_F64_U32(input, scratch);
+    } else if (fromType == MIRType::Float32) {
+      ma_vcvt_F32_U32(input, scratch);
+    } else {
+      MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
+    }
+
+    ma_vxfer(scratch, output);
+
+    if (!isSaturating) {
+      // int32_t(UINT32_MAX) == -1.
+      ma_cmp(output, Imm32(-1), scratchReg);
+      as_cmp(output, Imm8(0), Assembler::NotEqual);
+      ma_b(oolEntry, Assembler::Equal);
+    }
+
+    return;
+  }
+
+  // vcvt* converts NaN into 0, so check for NaNs here.
+  if (!isSaturating) {
+    if (fromType == MIRType::Double) {
+      asMasm().compareDouble(input, input);
+    } else if (fromType == MIRType::Float32) {
+      asMasm().compareFloat(input, input);
+    } else {
+      MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
+    }
+
+    ma_b(oolEntry, Assembler::VFP_Unordered);
+  }
+
+  scratch = scratchScope.sintOverlay();
+
+  if (fromType == MIRType::Double) {
+    ma_vcvt_F64_I32(input, scratch);
+  } else if (fromType == MIRType::Float32) {
+    ma_vcvt_F32_I32(input, scratch);
+  } else {
+    MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
+  }
+
+  ma_vxfer(scratch, output);
+
+  if (!isSaturating) {
+    ma_cmp(output, Imm32(INT32_MAX), scratchReg);
+    ma_cmp(output, Imm32(INT32_MIN), scratchReg, Assembler::NotEqual);
+    ma_b(oolEntry, Assembler::Equal);
+  }
+}
+
+void MacroAssemblerARM::outOfLineWasmTruncateToIntCheck(
+    FloatRegister input, MIRType fromType, MIRType toType, TruncFlags flags,
+    Label* rejoin, wasm::BytecodeOffset trapOffset) {
+  // On ARM, saturating truncation codegen handles saturating itself rather
+  // than relying on out-of-line fixup code.
+  if (flags & TRUNC_SATURATING) {
+    return;
+  }
+
+  bool isUnsigned = flags & TRUNC_UNSIGNED;
+  ScratchDoubleScope scratchScope(asMasm());
+  FloatRegister scratch;
+
+  // Eagerly take care of NaNs.
+  Label inputIsNaN;
+  if (fromType == MIRType::Double) {
+    asMasm().branchDouble(Assembler::DoubleUnordered, input, input,
+                          &inputIsNaN);
+  } else if (fromType == MIRType::Float32) {
+    asMasm().branchFloat(Assembler::DoubleUnordered, input, input, &inputIsNaN);
+  } else {
+    MOZ_CRASH("unexpected type in visitOutOfLineWasmTruncateCheck");
+  }
+
+  // Handle special values.
+  Label fail;
+
+  // By default test for the following inputs and bail:
+  // signed:   ] -Inf, INTXX_MIN - 1.0 ] and [ INTXX_MAX + 1.0 : +Inf [
+  // unsigned: ] -Inf, -1.0 ] and [ UINTXX_MAX + 1.0 : +Inf [
+  // Note: we cannot always represent those exact values. As a result
+  // this changes the actual comparison a bit.
+  double minValue, maxValue;
+  Assembler::DoubleCondition minCond = Assembler::DoubleLessThanOrEqual;
+  Assembler::DoubleCondition maxCond = Assembler::DoubleGreaterThanOrEqual;
+  if (toType == MIRType::Int64) {
+    if (isUnsigned) {
+      minValue = -1;
+      maxValue = double(UINT64_MAX) + 1.0;
+    } else {
+      // In the float32/double range there exists no value between
+      // INT64_MIN and INT64_MIN - 1.0. Making INT64_MIN the lower-bound.
+      minValue = double(INT64_MIN);
+      minCond = Assembler::DoubleLessThan;
+      maxValue = double(INT64_MAX) + 1.0;
+    }
+  } else {
+    if (isUnsigned) {
+      minValue = -1;
+      maxValue = double(UINT32_MAX) + 1.0;
+    } else {
+      if (fromType == MIRType::Float32) {
+        // In the float32 range there exists no value between
+        // INT32_MIN and INT32_MIN - 1.0. Making INT32_MIN the lower-bound.
+        minValue = double(INT32_MIN);
+        minCond = Assembler::DoubleLessThan;
+      } else {
+        minValue = double(INT32_MIN) - 1.0;
+      }
+      maxValue = double(INT32_MAX) + 1.0;
+    }
+  }
+
+  if (fromType == MIRType::Double) {
+    scratch = scratchScope.doubleOverlay();
+    asMasm().loadConstantDouble(minValue, scratch);
+    asMasm().branchDouble(minCond, input, scratch, &fail);
+
+    asMasm().loadConstantDouble(maxValue, scratch);
+    asMasm().branchDouble(maxCond, input, scratch, &fail);
+  } else {
+    MOZ_ASSERT(fromType == MIRType::Float32);
+    scratch = scratchScope.singleOverlay();
+    asMasm().loadConstantFloat32(float(minValue), scratch);
+    asMasm().branchFloat(minCond, input, scratch, &fail);
+
+    asMasm().loadConstantFloat32(float(maxValue), scratch);
+    asMasm().branchFloat(maxCond, input, scratch, &fail);
+  }
+
+  // We had an actual correct value, get back to where we were.
+  ma_b(rejoin);
+
+  // Handle errors.
+  bind(&fail);
+  asMasm().wasmTrap(wasm::Trap::IntegerOverflow, trapOffset);
+
+  bind(&inputIsNaN);
+  asMasm().wasmTrap(wasm::Trap::InvalidConversionToInteger, trapOffset);
+}
+
+void MacroAssemblerARM::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
+                                     Register memoryBase, Register ptr,
+                                     Register ptrScratch, AnyRegister output,
+                                     Register64 out64) {
+  MOZ_ASSERT(ptr == ptrScratch);
+  MOZ_ASSERT(!access.isZeroExtendSimd128Load());
+  MOZ_ASSERT(!access.isSplatSimd128Load());
+  MOZ_ASSERT(!access.isWidenSimd128Load());
+
+  uint32_t offset = access.offset();
+  MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
+
+  Scalar::Type type = access.type();
+
+  // Maybe add the offset.
+  if (offset || type == Scalar::Int64) {
+    ScratchRegisterScope scratch(asMasm());
+    if (offset) {
+      ma_add(Imm32(offset), ptr, scratch);
+    }
+  }
+
+  bool isSigned = type == Scalar::Int8 || type == Scalar::Int16 ||
+                  type == Scalar::Int32 || type == Scalar::Int64;
+  unsigned byteSize = access.byteSize();
+
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
+  asMasm().memoryBarrierBefore(access.sync());
+
+  BufferOffset load;
+  if (out64 != Register64::Invalid()) {
+    if (type == Scalar::Int64) {
+      static_assert(INT64LOW_OFFSET == 0);
+
+      load = ma_dataTransferN(IsLoad, 32, /* signed = */ false, memoryBase, ptr,
+                              out64.low);
+      append(access, load.getOffset());
+
+      as_add(ptr, ptr, Imm8(INT64HIGH_OFFSET));
+
+      load =
+          ma_dataTransferN(IsLoad, 32, isSigned, memoryBase, ptr, out64.high);
+      append(access, load.getOffset());
+    } else {
+      load = ma_dataTransferN(IsLoad, byteSize * 8, isSigned, memoryBase, ptr,
+                              out64.low);
+      append(access, load.getOffset());
+
+      if (isSigned) {
+        ma_asr(Imm32(31), out64.low, out64.high);
+      } else {
+        ma_mov(Imm32(0), out64.high);
+      }
+    }
+  } else {
+    bool isFloat = output.isFloat();
+    if (isFloat) {
+      MOZ_ASSERT((byteSize == 4) == output.fpu().isSingle());
+      ScratchRegisterScope scratch(asMasm());
+      FloatRegister dest = output.fpu();
+      ma_add(memoryBase, ptr, scratch);
+
+      // FP loads can't use VLDR as that has stringent alignment checks and will
+      // SIGBUS on unaligned accesses.  Choose a different strategy depending on
+      // the available hardware. We don't gate Wasm on the presence of NEON.
+      if (HasNEON()) {
+        // NEON available: The VLD1 multiple-single-elements variant will only
+        // trap if SCTRL.A==1, but we already assume (for integer accesses) that
+        // the hardware/OS handles that transparently.
+        //
+        // An additional complication is that if we're targeting the high single
+        // then an unaligned load is not possible, and we may need to go via the
+        // FPR scratch.
+        if (byteSize == 4 && dest.code() & 1) {
+          ScratchFloat32Scope fscratch(asMasm());
+          load = as_vldr_unaligned(fscratch, scratch);
+          as_vmov(dest, fscratch);
+        } else {
+          load = as_vldr_unaligned(dest, scratch);
+        }
+      } else {
+        // NEON not available: Load to GPR scratch, move to FPR destination.  We
+        // don't have adjacent scratches for the f64, so use individual LDRs,
+        // not LDRD.
+        SecondScratchRegisterScope scratch2(asMasm());
+        if (byteSize == 4) {
+          load = as_dtr(IsLoad, 32, Offset, scratch2,
+                        DTRAddr(scratch, DtrOffImm(0)), Always);
+          as_vxfer(scratch2, InvalidReg, VFPRegister(dest), CoreToFloat,
+                   Always);
+        } else {
+          // The trap information is associated with the load of the high word,
+          // which must be done first.
+          load = as_dtr(IsLoad, 32, Offset, scratch2,
+                        DTRAddr(scratch, DtrOffImm(4)), Always);
+          as_dtr(IsLoad, 32, Offset, scratch, DTRAddr(scratch, DtrOffImm(0)),
+                 Always);
+          as_vxfer(scratch, scratch2, VFPRegister(dest), CoreToFloat, Always);
+        }
+      }
+      append(access, load.getOffset());
+    } else {
+      load = ma_dataTransferN(IsLoad, byteSize * 8, isSigned, memoryBase, ptr,
+                              output.gpr());
+      append(access, load.getOffset());
+    }
+  }
+
+  asMasm().memoryBarrierAfter(access.sync());
+}
+
+void MacroAssemblerARM::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
+                                      AnyRegister value, Register64 val64,
+                                      Register memoryBase, Register ptr,
+                                      Register ptrScratch) {
+  static_assert(INT64LOW_OFFSET == 0);
+  static_assert(INT64HIGH_OFFSET == 4);
+
+  MOZ_ASSERT(ptr == ptrScratch);
+
+  uint32_t offset = access.offset();
+  MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
+
+  unsigned byteSize = access.byteSize();
+  Scalar::Type type = access.type();
+
+  // Maybe add the offset.
+  if (offset || type == Scalar::Int64) {
+    ScratchRegisterScope scratch(asMasm());
+    // We need to store the high word of an Int64 first, so always adjust the
+    // pointer to point to the high word in this case.  The adjustment is always
+    // OK because wasmMaxOffsetGuardLimit is computed so that we can add up to
+    // sizeof(LargestValue)-1 without skipping past the guard page, and we
+    // assert above that offset < wasmMaxOffsetGuardLimit.
+    if (type == Scalar::Int64) {
+      offset += INT64HIGH_OFFSET;
+    }
+    if (offset) {
+      ma_add(Imm32(offset), ptr, scratch);
+    }
+  }
+
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
+  asMasm().memoryBarrierBefore(access.sync());
+
+  BufferOffset store;
+  if (type == Scalar::Int64) {
+    store = ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ false,
+                             memoryBase, ptr, val64.high);
+    append(access, store.getOffset());
+
+    as_sub(ptr, ptr, Imm8(INT64HIGH_OFFSET));
+
+    store = ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ true,
+                             memoryBase, ptr, val64.low);
+    append(access, store.getOffset());
+  } else {
+    if (value.isFloat()) {
+      ScratchRegisterScope scratch(asMasm());
+      FloatRegister val = value.fpu();
+      MOZ_ASSERT((byteSize == 4) == val.isSingle());
+      ma_add(memoryBase, ptr, scratch);
+
+      // See comments above at wasmLoadImpl for more about this logic.
+      if (HasNEON()) {
+        if (byteSize == 4 && (val.code() & 1)) {
+          ScratchFloat32Scope fscratch(asMasm());
+          as_vmov(fscratch, val);
+          store = as_vstr_unaligned(fscratch, scratch);
+        } else {
+          store = as_vstr_unaligned(val, scratch);
+        }
+      } else {
+        // NEON not available: Move FPR to GPR scratch, store GPR.  We have only
+        // one scratch to hold the value, so for f64 we must do two separate
+        // moves.  That's OK - this is really a corner case.  If we really cared
+        // we would pass in a temp to avoid the second move.
+        SecondScratchRegisterScope scratch2(asMasm());
+        if (byteSize == 4) {
+          as_vxfer(scratch2, InvalidReg, VFPRegister(val), FloatToCore, Always);
+          store = as_dtr(IsStore, 32, Offset, scratch2,
+                         DTRAddr(scratch, DtrOffImm(0)), Always);
+        } else {
+          // The trap information is associated with the store of the high word,
+          // which must be done first.
+          as_vxfer(scratch2, InvalidReg, VFPRegister(val).singleOverlay(1),
+                   FloatToCore, Always);
+          store = as_dtr(IsStore, 32, Offset, scratch2,
+                         DTRAddr(scratch, DtrOffImm(4)), Always);
+          as_vxfer(scratch2, InvalidReg, VFPRegister(val).singleOverlay(0),
+                   FloatToCore, Always);
+          as_dtr(IsStore, 32, Offset, scratch2, DTRAddr(scratch, DtrOffImm(0)),
+                 Always);
+        }
+      }
+      append(access, store.getOffset());
+    } else {
+      bool isSigned = type == Scalar::Uint32 ||
+                      type == Scalar::Int32;  // see AsmJSStoreHeap;
+      Register val = value.gpr();
+
+      store = ma_dataTransferN(IsStore, 8 * byteSize /* bits */, isSigned,
+                               memoryBase, ptr, val);
+      append(access, store.getOffset());
+    }
+  }
+
+  asMasm().memoryBarrierAfter(access.sync());
+}
diff --git a/js/src/jit/arm/MacroAssembler-arm.h b/js/src/jit/arm/MacroAssembler-arm.h
new file mode 100644
index 0000000000..958cdf4718
--- /dev/null
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -0,0 +1,1392 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_MacroAssembler_arm_h
+#define jit_arm_MacroAssembler_arm_h
+
+#include "mozilla/DebugOnly.h"
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/MoveResolver.h"
+#include "vm/BytecodeUtil.h"
+#include "wasm/WasmBuiltins.h"
+#include "wasm/WasmCodegenTypes.h"
+
+namespace js {
+namespace jit {
+
+static Register CallReg = ip;
+static const int defaultShift = 3;
+static_assert(1 << defaultShift == sizeof(JS::Value));
+
+// See documentation for ScratchTagScope and ScratchTagScopeRelease in
+// MacroAssembler-x64.h.
+
+class ScratchTagScope {
+  const ValueOperand& v_;
+
+ public:
+  ScratchTagScope(MacroAssembler&, const ValueOperand& v) : v_(v) {}
+  operator Register() { return v_.typeReg(); }
+  void release() {}
+  void reacquire() {}
+};
+
+class ScratchTagScopeRelease {
+ public:
+  explicit ScratchTagScopeRelease(ScratchTagScope*) {}
+};
+
+// MacroAssemblerARM is inheriting form Assembler defined in
+// Assembler-arm.{h,cpp}
+class MacroAssemblerARM : public Assembler {
+ private:
+  // Perform a downcast. Should be removed by Bug 996602.
+  MacroAssembler& asMasm();
+  const MacroAssembler& asMasm() const;
+
+ protected:
+  // On ARM, some instructions require a second scratch register. This
+  // register defaults to lr, since it's non-allocatable (as it can be
+  // clobbered by some instructions). Allow the baseline compiler to override
+  // this though, since baseline IC stubs rely on lr holding the return
+  // address.
+  Register secondScratchReg_;
+
+ public:
+  Register getSecondScratchReg() const { return secondScratchReg_; }
+
+ public:
+  // Higher level tag testing code.
+  // TODO: Can probably remove the Operand versions.
+  Operand ToPayload(Operand base) const {
+    return Operand(Register::FromCode(base.base()), base.disp());
+  }
+  Address ToPayload(const Address& base) const { return base; }
+  BaseIndex ToPayload(const BaseIndex& base) const { return base; }
+
+ protected:
+  Operand ToType(Operand base) const {
+    return Operand(Register::FromCode(base.base()),
+                   base.disp() + sizeof(void*));
+  }
+  Address ToType(const Address& base) const {
+    return ToType(Operand(base)).toAddress();
+  }
+  BaseIndex ToType(const BaseIndex& base) const {
+    return BaseIndex(base.base, base.index, base.scale,
+                     base.offset + sizeof(void*));
+  }
+
+  Address ToPayloadAfterStackPush(const Address& base) const {
+    // If we are based on StackPointer, pass over the type tag just pushed.
+    if (base.base == StackPointer) {
+      return Address(base.base, base.offset + sizeof(void*));
+    }
+    return ToPayload(base);
+  }
+
+ public:
+  MacroAssemblerARM() : secondScratchReg_(lr) {}
+
+  void setSecondScratchReg(Register reg) {
+    MOZ_ASSERT(reg != ScratchRegister);
+    secondScratchReg_ = reg;
+  }
+
+  void convertBoolToInt32(Register source, Register dest);
+  void convertInt32ToDouble(Register src, FloatRegister dest);
+  void convertInt32ToDouble(const Address& src, FloatRegister dest);
+  void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest);
+  void convertUInt32ToFloat32(Register src, FloatRegister dest);
+  void convertUInt32ToDouble(Register src, FloatRegister dest);
+  void convertDoubleToFloat32(FloatRegister src, FloatRegister dest,
+                              Condition c = Always);
+  void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
+                            bool negativeZeroCheck = true);
+  void convertDoubleToPtr(FloatRegister src, Register dest, Label* fail,
+                          bool negativeZeroCheck = true) {
+    convertDoubleToInt32(src, dest, fail, negativeZeroCheck);
+  }
+  void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
+                             bool negativeZeroCheck = true);
+
+  void convertFloat32ToDouble(FloatRegister src, FloatRegister dest);
+  void convertInt32ToFloat32(Register src, FloatRegister dest);
+  void convertInt32ToFloat32(const Address& src, FloatRegister dest);
+
+  void wasmTruncateToInt32(FloatRegister input, Register output,
+                           MIRType fromType, bool isUnsigned, bool isSaturating,
+                           Label* oolEntry);
+  void outOfLineWasmTruncateToIntCheck(FloatRegister input, MIRType fromType,
+                                       MIRType toType, TruncFlags flags,
+                                       Label* rejoin,
+                                       wasm::BytecodeOffset trapOffset);
+
+  // Somewhat direct wrappers for the low-level assembler funcitons
+  // bitops. Attempt to encode a virtual alu instruction using two real
+  // instructions.
+ private:
+  bool alu_dbl(Register src1, Imm32 imm, Register dest, ALUOp op, SBit s,
+               Condition c);
+
+ public:
+  void ma_alu(Register src1, Imm32 imm, Register dest,
+              AutoRegisterScope& scratch, ALUOp op, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_alu(Register src1, Operand2 op2, Register dest, ALUOp op,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_alu(Register src1, Operand op2, Register dest, ALUOp op,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_nop();
+
+  BufferOffset ma_movPatchable(Imm32 imm, Register dest,
+                               Assembler::Condition c);
+  BufferOffset ma_movPatchable(ImmPtr imm, Register dest,
+                               Assembler::Condition c);
+
+  // To be used with Iter := InstructionIterator or BufferInstructionIterator.
+  template <class Iter>
+  static void ma_mov_patch(Imm32 imm, Register dest, Assembler::Condition c,
+                           RelocStyle rs, Iter iter);
+
+  // ALU based ops
+  // mov
+  void ma_mov(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_mov(Imm32 imm, Register dest, Condition c = Always);
+  void ma_mov(ImmWord imm, Register dest, Condition c = Always);
+
+  void ma_mov(ImmGCPtr ptr, Register dest);
+
+  // Shifts (just a move with a shifting op2)
+  void ma_lsl(Imm32 shift, Register src, Register dst);
+  void ma_lsr(Imm32 shift, Register src, Register dst);
+  void ma_asr(Imm32 shift, Register src, Register dst);
+  void ma_ror(Imm32 shift, Register src, Register dst);
+  void ma_rol(Imm32 shift, Register src, Register dst);
+
+  void ma_lsl(Register shift, Register src, Register dst);
+  void ma_lsr(Register shift, Register src, Register dst);
+  void ma_asr(Register shift, Register src, Register dst);
+  void ma_ror(Register shift, Register src, Register dst);
+  void ma_rol(Register shift, Register src, Register dst,
+              AutoRegisterScope& scratch);
+
+  // Move not (dest <- ~src)
+  void ma_mvn(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Negate (dest <- -src) implemented as rsb dest, src, 0
+  void ma_neg(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_neg(Register64 src, Register64 dest);
+
+  // And
+  void ma_and(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_and(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_and(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+
+  void ma_and(Imm32 imm, Register src1, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Bit clear (dest <- dest & ~imm) or (dest <- src1 & ~src2)
+  void ma_bic(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+
+  // Exclusive or
+  void ma_eor(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_eor(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_eor(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+
+  void ma_eor(Imm32 imm, Register src1, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Or
+  void ma_orr(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_orr(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_orr(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+
+  void ma_orr(Imm32 imm, Register src1, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Arithmetic based ops.
+  // Add with carry:
+  void ma_adc(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_adc(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_adc(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_adc(Register src1, Imm32 op, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Add:
+  void ma_add(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_add(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_add(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_add(Register src1, Operand op, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_add(Register src1, Imm32 op, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Subtract with carry:
+  void ma_sbc(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_sbc(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_sbc(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Subtract:
+  void ma_sub(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_sub(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_sub(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_sub(Register src1, Operand op, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_sub(Register src1, Imm32 op, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Reverse subtract:
+  void ma_rsb(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_rsb(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_rsb(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_rsb(Register src1, Imm32 op2, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Reverse subtract with carry:
+  void ma_rsc(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_rsc(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_rsc(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Compares/tests.
+  // Compare negative (sets condition codes as src1 + src2 would):
+  void ma_cmn(Register src1, Imm32 imm, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_cmn(Register src1, Register src2, Condition c = Always);
+  void ma_cmn(Register src1, Operand op, Condition c = Always);
+
+  // Compare (src - src2):
+  void ma_cmp(Register src1, Imm32 imm, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_cmp(Register src1, ImmTag tag, Condition c = Always);
+  void ma_cmp(Register src1, ImmWord ptr, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_cmp(Register src1, ImmGCPtr ptr, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_cmp(Register src1, Operand op, AutoRegisterScope& scratch,
+              AutoRegisterScope& scratch2, Condition c = Always);
+  void ma_cmp(Register src1, Register src2, Condition c = Always);
+
+  // Test for equality, (src1 ^ src2):
+  void ma_teq(Register src1, Imm32 imm, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_teq(Register src1, Register src2, Condition c = Always);
+  void ma_teq(Register src1, Operand op, Condition c = Always);
+
+  // Test (src1 & src2):
+  void ma_tst(Register src1, Imm32 imm, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_tst(Register src1, Register src2, Condition c = Always);
+  void ma_tst(Register src1, Operand op, Condition c = Always);
+
+  // Multiplies. For now, there are only two that we care about.
+  void ma_mul(Register src1, Register src2, Register dest);
+  void ma_mul(Register src1, Imm32 imm, Register dest,
+              AutoRegisterScope& scratch);
+  Condition ma_check_mul(Register src1, Register src2, Register dest,
+                         AutoRegisterScope& scratch, Condition cond);
+  Condition ma_check_mul(Register src1, Imm32 imm, Register dest,
+                         AutoRegisterScope& scratch, Condition cond);
+
+  void ma_umull(Register src1, Imm32 imm, Register destHigh, Register destLow,
+                AutoRegisterScope& scratch);
+  void ma_umull(Register src1, Register src2, Register destHigh,
+                Register destLow);
+
+  // Fast mod, uses scratch registers, and thus needs to be in the assembler
+  // implicitly assumes that we can overwrite dest at the beginning of the
+  // sequence.
+  void ma_mod_mask(Register src, Register dest, Register hold, Register tmp,
+                   AutoRegisterScope& scratch, AutoRegisterScope& scratch2,
+                   int32_t shift);
+
+  // Mod - depends on integer divide instructions being supported.
+  void ma_smod(Register num, Register div, Register dest,
+               AutoRegisterScope& scratch);
+  void ma_umod(Register num, Register div, Register dest,
+               AutoRegisterScope& scratch);
+
+  // Division - depends on integer divide instructions being supported.
+  void ma_sdiv(Register num, Register div, Register dest,
+               Condition cond = Always);
+  void ma_udiv(Register num, Register div, Register dest,
+               Condition cond = Always);
+  // Misc operations
+  void ma_clz(Register src, Register dest, Condition cond = Always);
+  void ma_ctz(Register src, Register dest, AutoRegisterScope& scratch);
+  // Memory:
+  // Shortcut for when we know we're transferring 32 bits of data.
+  void ma_dtr(LoadStore ls, Register rn, Imm32 offset, Register rt,
+              AutoRegisterScope& scratch, Index mode = Offset,
+              Condition cc = Always);
+  void ma_dtr(LoadStore ls, Register rt, const Address& addr,
+              AutoRegisterScope& scratch, Index mode, Condition cc);
+
+  void ma_str(Register rt, DTRAddr addr, Index mode = Offset,
+              Condition cc = Always);
+  void ma_str(Register rt, const Address& addr, AutoRegisterScope& scratch,
+              Index mode = Offset, Condition cc = Always);
+
+  void ma_ldr(DTRAddr addr, Register rt, Index mode = Offset,
+              Condition cc = Always);
+  void ma_ldr(const Address& addr, Register rt, AutoRegisterScope& scratch,
+              Index mode = Offset, Condition cc = Always);
+
+  void ma_ldrb(DTRAddr addr, Register rt, Index mode = Offset,
+               Condition cc = Always);
+  void ma_ldrh(EDtrAddr addr, Register rt, Index mode = Offset,
+               Condition cc = Always);
+  void ma_ldrsh(EDtrAddr addr, Register rt, Index mode = Offset,
+                Condition cc = Always);
+  void ma_ldrsb(EDtrAddr addr, Register rt, Index mode = Offset,
+                Condition cc = Always);
+  void ma_ldrd(EDtrAddr addr, Register rt, mozilla::DebugOnly<Register> rt2,
+               Index mode = Offset, Condition cc = Always);
+  void ma_strb(Register rt, DTRAddr addr, Index mode = Offset,
+               Condition cc = Always);
+  void ma_strh(Register rt, EDtrAddr addr, Index mode = Offset,
+               Condition cc = Always);
+  void ma_strd(Register rt, mozilla::DebugOnly<Register> rt2, EDtrAddr addr,
+               Index mode = Offset, Condition cc = Always);
+
+  // Specialty for moving N bits of data, where n == 8,16,32,64.
+  BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+                                Register rn, Register rm, Register rt,
+                                AutoRegisterScope& scratch, Index mode = Offset,
+                                Condition cc = Always, Scale scale = TimesOne);
+
+  BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+                                Register rn, Register rm, Register rt,
+                                Index mode = Offset, Condition cc = Always);
+
+  BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+                                Register rn, Imm32 offset, Register rt,
+                                AutoRegisterScope& scratch, Index mode = Offset,
+                                Condition cc = Always);
+
+  void ma_pop(Register r);
+  void ma_popn_pc(Imm32 n, AutoRegisterScope& scratch,
+                  AutoRegisterScope& scratch2);
+  void ma_push(Register r);
+  void ma_push_sp(Register r, AutoRegisterScope& scratch);
+
+  void ma_vpop(VFPRegister r);
+  void ma_vpush(VFPRegister r);
+
+  // Barriers.
+  void ma_dmb(BarrierOption option = BarrierSY);
+  void ma_dsb(BarrierOption option = BarrierSY);
+
+  // Branches when done from within arm-specific code.
+  BufferOffset ma_b(Label* dest, Condition c = Always);
+  void ma_b(void* target, Condition c = Always);
+  void ma_bx(Register dest, Condition c = Always);
+
+  // This is almost NEVER necessary, we'll basically never be calling a label
+  // except, possibly in the crazy bailout-table case.
+  void ma_bl(Label* dest, Condition c = Always);
+
+  void ma_blx(Register dest, Condition c = Always);
+
+  // VFP/ALU:
+  void ma_vadd(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+  void ma_vsub(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+  void ma_vmul(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+  void ma_vdiv(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+  void ma_vneg(FloatRegister src, FloatRegister dest, Condition cc = Always);
+  void ma_vmov(FloatRegister src, FloatRegister dest, Condition cc = Always);
+  void ma_vmov_f32(FloatRegister src, FloatRegister dest,
+                   Condition cc = Always);
+  void ma_vabs(FloatRegister src, FloatRegister dest, Condition cc = Always);
+  void ma_vabs_f32(FloatRegister src, FloatRegister dest,
+                   Condition cc = Always);
+
+  void ma_vsqrt(FloatRegister src, FloatRegister dest, Condition cc = Always);
+  void ma_vsqrt_f32(FloatRegister src, FloatRegister dest,
+                    Condition cc = Always);
+
+  void ma_vimm(double value, FloatRegister dest, Condition cc = Always);
+  void ma_vimm_f32(float value, FloatRegister dest, Condition cc = Always);
+
+  void ma_vcmp(FloatRegister src1, FloatRegister src2, Condition cc = Always);
+  void ma_vcmp_f32(FloatRegister src1, FloatRegister src2,
+                   Condition cc = Always);
+  void ma_vcmpz(FloatRegister src1, Condition cc = Always);
+  void ma_vcmpz_f32(FloatRegister src1, Condition cc = Always);
+
+  void ma_vadd_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+  void ma_vsub_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+  void ma_vmul_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+  void ma_vdiv_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+  void ma_vneg_f32(FloatRegister src, FloatRegister dest,
+                   Condition cc = Always);
+
+  // Source is F64, dest is I32:
+  void ma_vcvt_F64_I32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+  void ma_vcvt_F64_U32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+
+  // Source is I32, dest is F64:
+  void ma_vcvt_I32_F64(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+  void ma_vcvt_U32_F64(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+
+  // Source is F32, dest is I32:
+  void ma_vcvt_F32_I32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+  void ma_vcvt_F32_U32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+
+  // Source is I32, dest is F32:
+  void ma_vcvt_I32_F32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+  void ma_vcvt_U32_F32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+
+  // Transfer (do not coerce) a float into a gpr.
+  void ma_vxfer(VFPRegister src, Register dest, Condition cc = Always);
+  // Transfer (do not coerce) a double into a couple of gpr.
+  void ma_vxfer(VFPRegister src, Register dest1, Register dest2,
+                Condition cc = Always);
+
+  // Transfer (do not coerce) a gpr into a float
+  void ma_vxfer(Register src, FloatRegister dest, Condition cc = Always);
+  // Transfer (do not coerce) a couple of gpr into a double
+  void ma_vxfer(Register src1, Register src2, FloatRegister dest,
+                Condition cc = Always);
+
+  BufferOffset ma_vdtr(LoadStore ls, const Address& addr, VFPRegister dest,
+                       AutoRegisterScope& scratch, Condition cc = Always);
+
+  BufferOffset ma_vldr(VFPAddr addr, VFPRegister dest, Condition cc = Always);
+  BufferOffset ma_vldr(const Address& addr, VFPRegister dest,
+                       AutoRegisterScope& scratch, Condition cc = Always);
+  BufferOffset ma_vldr(VFPRegister src, Register base, Register index,
+                       AutoRegisterScope& scratch, int32_t shift = defaultShift,
+                       Condition cc = Always);
+
+  BufferOffset ma_vstr(VFPRegister src, VFPAddr addr, Condition cc = Always);
+  BufferOffset ma_vstr(VFPRegister src, const Address& addr,
+                       AutoRegisterScope& scratch, Condition cc = Always);
+  BufferOffset ma_vstr(VFPRegister src, Register base, Register index,
+                       AutoRegisterScope& scratch, AutoRegisterScope& scratch2,
+                       int32_t shift, int32_t offset, Condition cc = Always);
+  BufferOffset ma_vstr(VFPRegister src, Register base, Register index,
+                       AutoRegisterScope& scratch, int32_t shift,
+                       Condition cc = Always);
+
+  void ma_call(ImmPtr dest);
+
+  // Float registers can only be loaded/stored in continuous runs when using
+  // vstm/vldm. This function breaks set into continuous runs and loads/stores
+  // them at [rm]. rm will be modified and left in a state logically suitable
+  // for the next load/store. Returns the offset from [dm] for the logical
+  // next load/store.
+  int32_t transferMultipleByRuns(FloatRegisterSet set, LoadStore ls,
+                                 Register rm, DTMMode mode) {
+    if (mode == IA) {
+      return transferMultipleByRunsImpl<FloatRegisterForwardIterator>(
+          set, ls, rm, mode, 1);
+    }
+    if (mode == DB) {
+      return transferMultipleByRunsImpl<FloatRegisterBackwardIterator>(
+          set, ls, rm, mode, -1);
+    }
+    MOZ_CRASH("Invalid data transfer addressing mode");
+  }
+
+  // `outAny` is valid if and only if `out64` == Register64::Invalid().
+  void wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register memoryBase,
+                    Register ptr, Register ptrScratch, AnyRegister outAny,
+                    Register64 out64);
+
+  // `valAny` is valid if and only if `val64` == Register64::Invalid().
+  void wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister valAny,
+                     Register64 val64, Register memoryBase, Register ptr,
+                     Register ptrScratch);
+
+ private:
+  // Implementation for transferMultipleByRuns so we can use different
+  // iterators for forward/backward traversals. The sign argument should be 1
+  // if we traverse forwards, -1 if we traverse backwards.
+  template <typename RegisterIterator>
+  int32_t transferMultipleByRunsImpl(FloatRegisterSet set, LoadStore ls,
+                                     Register rm, DTMMode mode, int32_t sign) {
+    MOZ_ASSERT(sign == 1 || sign == -1);
+
+    int32_t delta = sign * sizeof(float);
+    int32_t offset = 0;
+    // Build up a new set, which is the sum of all of the single and double
+    // registers. This set can have up to 48 registers in it total
+    // s0-s31 and d16-d31
+    FloatRegisterSet mod = set.reduceSetForPush();
+
+    RegisterIterator iter(mod);
+    while (iter.more()) {
+      startFloatTransferM(ls, rm, mode, WriteBack);
+      int32_t reg = (*iter).code();
+      do {
+        offset += delta;
+        if ((*iter).isDouble()) {
+          offset += delta;
+        }
+        transferFloatReg(*iter);
+      } while ((++iter).more() && int32_t((*iter).code()) == (reg += sign));
+      finishFloatTransfer();
+    }
+    return offset;
+  }
+};
+
+class MacroAssembler;
+
+class MacroAssemblerARMCompat : public MacroAssemblerARM {
+ private:
+  // Perform a downcast. Should be removed by Bug 996602.
+  MacroAssembler& asMasm();
+  const MacroAssembler& asMasm() const;
+
+ public:
+  MacroAssemblerARMCompat() {}
+
+ public:
+  // Jumps + other functions that should be called from non-arm specific
+  // code. Basically, an x86 front end on top of the ARM code.
+  void j(Condition code, Label* dest) { as_b(dest, code); }
+  void j(Label* dest) { as_b(dest, Always); }
+
+  void mov(Register src, Register dest) { ma_mov(src, dest); }
+  void mov(ImmWord imm, Register dest) { ma_mov(Imm32(imm.value), dest); }
+  void mov(ImmPtr imm, Register dest) {
+    mov(ImmWord(uintptr_t(imm.value)), dest);
+  }
+
+  void branch(JitCode* c) {
+    BufferOffset bo = m_buffer.nextOffset();
+    addPendingJump(bo, ImmPtr(c->raw()), RelocationKind::JITCODE);
+    ScratchRegisterScope scratch(asMasm());
+    ma_movPatchable(ImmPtr(c->raw()), scratch, Always);
+    ma_bx(scratch);
+  }
+  void branch(const Register reg) { ma_bx(reg); }
+  void nop() { ma_nop(); }
+  void shortJumpSizedNop() { ma_nop(); }
+  void ret() { ma_pop(pc); }
+  void retn(Imm32 n) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_popn_pc(n, scratch, scratch2);
+  }
+  void push(Imm32 imm) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_mov(imm, scratch);
+    ma_push(scratch);
+  }
+  void push(ImmWord imm) { push(Imm32(imm.value)); }
+  void push(ImmGCPtr imm) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_mov(imm, scratch);
+    ma_push(scratch);
+  }
+  void push(const Address& addr) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(addr, scratch, scratch2);
+    ma_push(scratch);
+  }
+  void push(Register reg) {
+    if (reg == sp) {
+      ScratchRegisterScope scratch(asMasm());
+      ma_push_sp(reg, scratch);
+    } else {
+      ma_push(reg);
+    }
+  }
+  void push(FloatRegister reg) { ma_vpush(VFPRegister(reg)); }
+  void pushWithPadding(Register reg, const Imm32 extraSpace) {
+    ScratchRegisterScope scratch(asMasm());
+    Imm32 totSpace = Imm32(extraSpace.value + 4);
+    ma_dtr(IsStore, sp, totSpace, reg, scratch, PreIndex);
+  }
+  void pushWithPadding(Imm32 imm, const Imm32 extraSpace) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    Imm32 totSpace = Imm32(extraSpace.value + 4);
+    ma_mov(imm, scratch);
+    ma_dtr(IsStore, sp, totSpace, scratch, scratch2, PreIndex);
+  }
+
+  void pop(Register reg) { ma_pop(reg); }
+  void pop(FloatRegister reg) { ma_vpop(VFPRegister(reg)); }
+
+  void popN(Register reg, Imm32 extraSpace) {
+    ScratchRegisterScope scratch(asMasm());
+    Imm32 totSpace = Imm32(extraSpace.value + 4);
+    ma_dtr(IsLoad, sp, totSpace, reg, scratch, PostIndex);
+  }
+
+  CodeOffset toggledJump(Label* label);
+
+  // Emit a BLX or NOP instruction. ToggleCall can be used to patch this
+  // instruction.
+  CodeOffset toggledCall(JitCode* target, bool enabled);
+
+  CodeOffset pushWithPatch(ImmWord imm) {
+    ScratchRegisterScope scratch(asMasm());
+    CodeOffset label = movWithPatch(imm, scratch);
+    ma_push(scratch);
+    return label;
+  }
+
+  CodeOffset movWithPatch(ImmWord imm, Register dest) {
+    CodeOffset label = CodeOffset(currentOffset());
+    ma_movPatchable(Imm32(imm.value), dest, Always);
+    return label;
+  }
+  CodeOffset movWithPatch(ImmPtr imm, Register dest) {
+    return movWithPatch(ImmWord(uintptr_t(imm.value)), dest);
+  }
+
+  void jump(Label* label) { as_b(label); }
+  void jump(JitCode* code) { branch(code); }
+  void jump(ImmPtr ptr) {
+    ScratchRegisterScope scratch(asMasm());
+    movePtr(ptr, scratch);
+    ma_bx(scratch);
+  }
+  void jump(TrampolinePtr code) { jump(ImmPtr(code.value)); }
+  void jump(Register reg) { ma_bx(reg); }
+  void jump(const Address& addr) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(addr, scratch, scratch2);
+    ma_bx(scratch);
+  }
+
+  void negl(Register reg) { ma_neg(reg, reg, SetCC); }
+  void test32(Register lhs, Register rhs) { ma_tst(lhs, rhs); }
+  void test32(Register lhs, Imm32 imm) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_tst(lhs, imm, scratch);
+  }
+  void test32(const Address& addr, Imm32 imm) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(addr, scratch, scratch2);
+    ma_tst(scratch, imm, scratch2);
+  }
+  void testPtr(Register lhs, Register rhs) { test32(lhs, rhs); }
+
+  void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag) {
+    MOZ_ASSERT(value.typeReg() == tag);
+  }
+
+  // Higher level tag testing code.
+  Condition testInt32(Condition cond, const ValueOperand& value);
+  Condition testBoolean(Condition cond, const ValueOperand& value);
+  Condition testDouble(Condition cond, const ValueOperand& value);
+  Condition testNull(Condition cond, const ValueOperand& value);
+  Condition testUndefined(Condition cond, const ValueOperand& value);
+  Condition testString(Condition cond, const ValueOperand& value);
+  Condition testSymbol(Condition cond, const ValueOperand& value);
+  Condition testBigInt(Condition cond, const ValueOperand& value);
+  Condition testObject(Condition cond, const ValueOperand& value);
+  Condition testNumber(Condition cond, const ValueOperand& value);
+  Condition testMagic(Condition cond, const ValueOperand& value);
+
+  Condition testPrimitive(Condition cond, const ValueOperand& value);
+  Condition testGCThing(Condition cond, const ValueOperand& value);
+
+  // Register-based tests.
+  Condition testInt32(Condition cond, Register tag);
+  Condition testBoolean(Condition cond, Register tag);
+  Condition testNull(Condition cond, Register tag);
+  Condition testUndefined(Condition cond, Register tag);
+  Condition testString(Condition cond, Register tag);
+  Condition testSymbol(Condition cond, Register tag);
+  Condition testBigInt(Condition cond, Register tag);
+  Condition testObject(Condition cond, Register tag);
+  Condition testDouble(Condition cond, Register tag);
+  Condition testNumber(Condition cond, Register tag);
+  Condition testMagic(Condition cond, Register tag);
+  Condition testPrimitive(Condition cond, Register tag);
+  Condition testGCThing(Condition cond, Register tag);
+
+  Condition testGCThing(Condition cond, const Address& address);
+  Condition testMagic(Condition cond, const Address& address);
+  Condition testInt32(Condition cond, const Address& address);
+  Condition testDouble(Condition cond, const Address& address);
+  Condition testBoolean(Condition cond, const Address& address);
+  Condition testNull(Condition cond, const Address& address);
+  Condition testUndefined(Condition cond, const Address& address);
+  Condition testString(Condition cond, const Address& address);
+  Condition testSymbol(Condition cond, const Address& address);
+  Condition testBigInt(Condition cond, const Address& address);
+  Condition testObject(Condition cond, const Address& address);
+  Condition testNumber(Condition cond, const Address& address);
+
+  Condition testUndefined(Condition cond, const BaseIndex& src);
+  Condition testNull(Condition cond, const BaseIndex& src);
+  Condition testBoolean(Condition cond, const BaseIndex& src);
+  Condition testString(Condition cond, const BaseIndex& src);
+  Condition testSymbol(Condition cond, const BaseIndex& src);
+  Condition testBigInt(Condition cond, const BaseIndex& src);
+  Condition testInt32(Condition cond, const BaseIndex& src);
+  Condition testObject(Condition cond, const BaseIndex& src);
+  Condition testDouble(Condition cond, const BaseIndex& src);
+  Condition testMagic(Condition cond, const BaseIndex& src);
+  Condition testGCThing(Condition cond, const BaseIndex& src);
+
+  // Unboxing code.
+  void unboxNonDouble(const ValueOperand& operand, Register dest,
+                      JSValueType type);
+  void unboxNonDouble(const Address& src, Register dest, JSValueType type);
+  void unboxNonDouble(const BaseIndex& src, Register dest, JSValueType type);
+  void unboxInt32(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_INT32);
+  }
+  void unboxInt32(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_INT32);
+  }
+  void unboxInt32(const BaseIndex& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_INT32);
+  }
+  void unboxBoolean(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN);
+  }
+  void unboxBoolean(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN);
+  }
+  void unboxBoolean(const BaseIndex& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN);
+  }
+  void unboxString(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_STRING);
+  }
+  void unboxString(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_STRING);
+  }
+  void unboxSymbol(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL);
+  }
+  void unboxSymbol(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL);
+  }
+  void unboxBigInt(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT);
+  }
+  void unboxBigInt(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT);
+  }
+  void unboxObject(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObject(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObject(const BaseIndex& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObjectOrNull(const ValueOperand& src, Register dest) {
+    // Due to Spectre mitigation logic (see Value.h), if the value is an Object
+    // then this yields the object; otherwise it yields zero (null), as desired.
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObjectOrNull(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObjectOrNull(const BaseIndex& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxDouble(const ValueOperand& src, FloatRegister dest);
+  void unboxDouble(const Address& src, FloatRegister dest);
+  void unboxDouble(const BaseIndex& src, FloatRegister dest);
+
+  void unboxValue(const ValueOperand& src, AnyRegister dest, JSValueType type);
+
+  // See comment in MacroAssembler-x64.h.
+  void unboxGCThingForGCBarrier(const Address& src, Register dest) {
+    load32(ToPayload(src), dest);
+  }
+
+  void notBoolean(const ValueOperand& val) {
+    as_eor(val.payloadReg(), val.payloadReg(), Imm8(1));
+  }
+
+  template <typename T>
+  void fallibleUnboxPtrImpl(const T& src, Register dest, JSValueType type,
+                            Label* fail);
+
+  // Boxing code.
+  void boxDouble(FloatRegister src, const ValueOperand& dest, FloatRegister);
+  void boxNonDouble(JSValueType type, Register src, const ValueOperand& dest);
+
+  // Extended unboxing API. If the payload is already in a register, returns
+  // that register. Otherwise, provides a move to the given scratch register,
+  // and returns that.
+  [[nodiscard]] Register extractObject(const Address& address,
+                                       Register scratch);
+  [[nodiscard]] Register extractObject(const ValueOperand& value,
+                                       Register scratch) {
+    unboxNonDouble(value, value.payloadReg(), JSVAL_TYPE_OBJECT);
+    return value.payloadReg();
+  }
+  [[nodiscard]] Register extractSymbol(const ValueOperand& value,
+                                       Register scratch) {
+    unboxNonDouble(value, value.payloadReg(), JSVAL_TYPE_SYMBOL);
+    return value.payloadReg();
+  }
+  [[nodiscard]] Register extractInt32(const ValueOperand& value,
+                                      Register scratch) {
+    return value.payloadReg();
+  }
+  [[nodiscard]] Register extractBoolean(const ValueOperand& value,
+                                        Register scratch) {
+    return value.payloadReg();
+  }
+  [[nodiscard]] Register extractTag(const Address& address, Register scratch);
+  [[nodiscard]] Register extractTag(const BaseIndex& address, Register scratch);
+  [[nodiscard]] Register extractTag(const ValueOperand& value,
+                                    Register scratch) {
+    return value.typeReg();
+  }
+
+  void boolValueToDouble(const ValueOperand& operand, FloatRegister dest);
+  void int32ValueToDouble(const ValueOperand& operand, FloatRegister dest);
+  void loadInt32OrDouble(const Address& src, FloatRegister dest);
+  void loadInt32OrDouble(Register base, Register index, FloatRegister dest,
+                         int32_t shift = defaultShift);
+  void loadConstantDouble(double dp, FloatRegister dest);
+
+  // Treat the value as a boolean, and set condition codes accordingly.
+  Condition testInt32Truthy(bool truthy, const ValueOperand& operand);
+  Condition testBooleanTruthy(bool truthy, const ValueOperand& operand);
+  Condition testDoubleTruthy(bool truthy, FloatRegister reg);
+  Condition testStringTruthy(bool truthy, const ValueOperand& value);
+  Condition testBigIntTruthy(bool truthy, const ValueOperand& value);
+
+  void boolValueToFloat32(const ValueOperand& operand, FloatRegister dest);
+  void int32ValueToFloat32(const ValueOperand& operand, FloatRegister dest);
+  void loadConstantFloat32(float f, FloatRegister dest);
+
+  void loadUnboxedValue(Address address, MIRType type, AnyRegister dest) {
+    if (dest.isFloat()) {
+      loadInt32OrDouble(address, dest.fpu());
+    } else {
+      ScratchRegisterScope scratch(asMasm());
+      ma_ldr(address, dest.gpr(), scratch);
+    }
+  }
+
+  void loadUnboxedValue(BaseIndex address, MIRType type, AnyRegister dest) {
+    if (dest.isFloat()) {
+      loadInt32OrDouble(address.base, address.index, dest.fpu(), address.scale);
+    } else {
+      load32(address, dest.gpr());
+    }
+  }
+
+  template <typename T>
+  void storeUnboxedPayload(ValueOperand value, T address, size_t nbytes,
+                           JSValueType) {
+    switch (nbytes) {
+      case 4:
+        storePtr(value.payloadReg(), address);
+        return;
+      case 1:
+        store8(value.payloadReg(), address);
+        return;
+      default:
+        MOZ_CRASH("Bad payload width");
+    }
+  }
+
+  void storeValue(ValueOperand val, const Address& dst);
+  void storeValue(ValueOperand val, const BaseIndex& dest);
+  void storeValue(JSValueType type, Register reg, BaseIndex dest) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    int32_t payloadoffset = dest.offset + NUNBOX32_PAYLOAD_OFFSET;
+    int32_t typeoffset = dest.offset + NUNBOX32_TYPE_OFFSET;
+
+    ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+
+    // Store the payload.
+    if (payloadoffset < 4096 && payloadoffset > -4096) {
+      ma_str(reg, DTRAddr(scratch, DtrOffImm(payloadoffset)));
+    } else {
+      ma_str(reg, Address(scratch, payloadoffset), scratch2);
+    }
+
+    // Store the type.
+    if (typeoffset < 4096 && typeoffset > -4096) {
+      // Encodable as DTRAddr, so only two instructions needed.
+      ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch2);
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(typeoffset)));
+    } else {
+      // Since there are only two scratch registers, the offset must be
+      // applied early using a third instruction to be safe.
+      ma_add(Imm32(typeoffset), scratch, scratch2);
+      ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch2);
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+    }
+  }
+  void storeValue(JSValueType type, Register reg, Address dest) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_str(reg, dest, scratch2);
+    ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch);
+    ma_str(scratch, Address(dest.base, dest.offset + NUNBOX32_TYPE_OFFSET),
+           scratch2);
+  }
+  void storeValue(const Value& val, const Address& dest) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_mov(Imm32(val.toNunboxTag()), scratch);
+    ma_str(scratch, ToType(dest), scratch2);
+    if (val.isGCThing()) {
+      ma_mov(ImmGCPtr(val.toGCThing()), scratch);
+    } else {
+      ma_mov(Imm32(val.toNunboxPayload()), scratch);
+    }
+    ma_str(scratch, ToPayload(dest), scratch2);
+  }
+  void storeValue(const Value& val, BaseIndex dest) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    int32_t typeoffset = dest.offset + NUNBOX32_TYPE_OFFSET;
+    int32_t payloadoffset = dest.offset + NUNBOX32_PAYLOAD_OFFSET;
+
+    ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+
+    // Store the type.
+    if (typeoffset < 4096 && typeoffset > -4096) {
+      ma_mov(Imm32(val.toNunboxTag()), scratch2);
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(typeoffset)));
+    } else {
+      ma_add(Imm32(typeoffset), scratch, scratch2);
+      ma_mov(Imm32(val.toNunboxTag()), scratch2);
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+      // Restore scratch for the payload store.
+      ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+    }
+
+    // Store the payload, marking if necessary.
+    if (payloadoffset < 4096 && payloadoffset > -4096) {
+      if (val.isGCThing()) {
+        ma_mov(ImmGCPtr(val.toGCThing()), scratch2);
+      } else {
+        ma_mov(Imm32(val.toNunboxPayload()), scratch2);
+      }
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(payloadoffset)));
+    } else {
+      ma_add(Imm32(payloadoffset), scratch, scratch2);
+      if (val.isGCThing()) {
+        ma_mov(ImmGCPtr(val.toGCThing()), scratch2);
+      } else {
+        ma_mov(Imm32(val.toNunboxPayload()), scratch2);
+      }
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+    }
+  }
+  void storeValue(const Address& src, const Address& dest, Register temp) {
+    load32(ToType(src), temp);
+    store32(temp, ToType(dest));
+
+    load32(ToPayload(src), temp);
+    store32(temp, ToPayload(dest));
+  }
+
+  void storePrivateValue(Register src, const Address& dest) {
+    store32(Imm32(0), ToType(dest));
+    store32(src, ToPayload(dest));
+  }
+  void storePrivateValue(ImmGCPtr imm, const Address& dest) {
+    store32(Imm32(0), ToType(dest));
+    storePtr(imm, ToPayload(dest));
+  }
+
+  void loadValue(Address src, ValueOperand val);
+  void loadValue(Operand dest, ValueOperand val) {
+    loadValue(dest.toAddress(), val);
+  }
+  void loadValue(const BaseIndex& addr, ValueOperand val);
+
+  // Like loadValue but guaranteed to not use LDRD or LDM instructions (these
+  // don't support unaligned accesses).
+  void loadUnalignedValue(const Address& src, ValueOperand dest);
+
+  void tagValue(JSValueType type, Register payload, ValueOperand dest);
+
+  void pushValue(ValueOperand val);
+  void popValue(ValueOperand val);
+  void pushValue(const Value& val) {
+    push(Imm32(val.toNunboxTag()));
+    if (val.isGCThing()) {
+      push(ImmGCPtr(val.toGCThing()));
+    } else {
+      push(Imm32(val.toNunboxPayload()));
+    }
+  }
+  void pushValue(JSValueType type, Register reg) {
+    push(ImmTag(JSVAL_TYPE_TO_TAG(type)));
+    ma_push(reg);
+  }
+  void pushValue(const Address& addr);
+  void pushValue(const BaseIndex& addr, Register scratch);
+
+  void storePayload(const Value& val, const Address& dest);
+  void storePayload(Register src, const Address& dest);
+  void storePayload(const Value& val, const BaseIndex& dest);
+  void storePayload(Register src, const BaseIndex& dest);
+  void storeTypeTag(ImmTag tag, const Address& dest);
+  void storeTypeTag(ImmTag tag, const BaseIndex& dest);
+
+  void handleFailureWithHandlerTail(Label* profilerExitTail,
+                                    Label* bailoutTail);
+
+  /////////////////////////////////////////////////////////////////
+  // Common interface.
+  /////////////////////////////////////////////////////////////////
+ public:
+  void not32(Register reg);
+
+  void move32(Imm32 imm, Register dest);
+  void move32(Register src, Register dest);
+
+  void movePtr(Register src, Register dest);
+  void movePtr(ImmWord imm, Register dest);
+  void movePtr(ImmPtr imm, Register dest);
+  void movePtr(wasm::SymbolicAddress imm, Register dest);
+  void movePtr(ImmGCPtr imm, Register dest);
+
+  void load8SignExtend(const Address& address, Register dest);
+  void load8SignExtend(const BaseIndex& src, Register dest);
+
+  void load8ZeroExtend(const Address& address, Register dest);
+  void load8ZeroExtend(const BaseIndex& src, Register dest);
+
+  void load16SignExtend(const Address& address, Register dest);
+  void load16SignExtend(const BaseIndex& src, Register dest);
+
+  template <typename S>
+  void load16UnalignedSignExtend(const S& src, Register dest) {
+    // load16SignExtend uses |ldrsh|, which supports unaligned access.
+    load16SignExtend(src, dest);
+  }
+
+  void load16ZeroExtend(const Address& address, Register dest);
+  void load16ZeroExtend(const BaseIndex& src, Register dest);
+
+  template <typename S>
+  void load16UnalignedZeroExtend(const S& src, Register dest) {
+    // load16ZeroExtend uses |ldrh|, which supports unaligned access.
+    load16ZeroExtend(src, dest);
+  }
+
+  void load32(const Address& address, Register dest);
+  void load32(const BaseIndex& address, Register dest);
+  void load32(AbsoluteAddress address, Register dest);
+
+  template <typename S>
+  void load32Unaligned(const S& src, Register dest) {
+    // load32 uses |ldr|, which supports unaligned access.
+    load32(src, dest);
+  }
+
+  void load64(const Address& address, Register64 dest) {
+    bool highBeforeLow = address.base == dest.low;
+    if (highBeforeLow) {
+      load32(HighWord(address), dest.high);
+      load32(LowWord(address), dest.low);
+    } else {
+      load32(LowWord(address), dest.low);
+      load32(HighWord(address), dest.high);
+    }
+  }
+  void load64(const BaseIndex& address, Register64 dest) {
+    // If you run into this, relax your register allocation constraints.
+    MOZ_RELEASE_ASSERT(
+        !((address.base == dest.low || address.base == dest.high) &&
+          (address.index == dest.low || address.index == dest.high)));
+    bool highBeforeLow = address.base == dest.low || address.index == dest.low;
+    if (highBeforeLow) {
+      load32(HighWord(address), dest.high);
+      load32(LowWord(address), dest.low);
+    } else {
+      load32(LowWord(address), dest.low);
+      load32(HighWord(address), dest.high);
+    }
+  }
+
+  template <typename S>
+  void load64Unaligned(const S& src, Register64 dest) {
+    // load64 calls load32, which supports unaligned accesses.
+    load64(src, dest);
+  }
+
+  void loadPtr(const Address& address, Register dest);
+  void loadPtr(const BaseIndex& src, Register dest);
+  void loadPtr(AbsoluteAddress address, Register dest);
+  void loadPtr(wasm::SymbolicAddress address, Register dest);
+
+  void loadPrivate(const Address& address, Register dest);
+
+  void loadDouble(const Address& addr, FloatRegister dest);
+  void loadDouble(const BaseIndex& src, FloatRegister dest);
+
+  // Load a float value into a register, then expand it to a double.
+  void loadFloatAsDouble(const Address& addr, FloatRegister dest);
+  void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest);
+
+  void loadFloat32(const Address& addr, FloatRegister dest);
+  void loadFloat32(const BaseIndex& src, FloatRegister dest);
+
+  void store8(Register src, const Address& address);
+  void store8(Imm32 imm, const Address& address);
+  void store8(Register src, const BaseIndex& address);
+  void store8(Imm32 imm, const BaseIndex& address);
+
+  void store16(Register src, const Address& address);
+  void store16(Imm32 imm, const Address& address);
+  void store16(Register src, const BaseIndex& address);
+  void store16(Imm32 imm, const BaseIndex& address);
+
+  template <typename S, typename T>
+  void store16Unaligned(const S& src, const T& dest) {
+    // store16 uses |strh|, which supports unaligned access.
+    store16(src, dest);
+  }
+
+  void store32(Register src, AbsoluteAddress address);
+  void store32(Register src, const Address& address);
+  void store32(Register src, const BaseIndex& address);
+  void store32(Imm32 src, const Address& address);
+  void store32(Imm32 src, const BaseIndex& address);
+
+  template <typename S, typename T>
+  void store32Unaligned(const S& src, const T& dest) {
+    // store32 uses |str|, which supports unaligned access.
+    store32(src, dest);
+  }
+
+  void store64(Register64 src, Address address) {
+    store32(src.low, LowWord(address));
+    store32(src.high, HighWord(address));
+  }
+
+  void store64(Register64 src, const BaseIndex& address) {
+    store32(src.low, LowWord(address));
+    store32(src.high, HighWord(address));
+  }
+
+  void store64(Imm64 imm, Address address) {
+    store32(imm.low(), LowWord(address));
+    store32(imm.hi(), HighWord(address));
+  }
+
+  void store64(Imm64 imm, const BaseIndex& address) {
+    store32(imm.low(), LowWord(address));
+    store32(imm.hi(), HighWord(address));
+  }
+
+  template <typename S, typename T>
+  void store64Unaligned(const S& src, const T& dest) {
+    // store64 calls store32, which supports unaligned access.
+    store64(src, dest);
+  }
+
+  void storePtr(ImmWord imm, const Address& address);
+  void storePtr(ImmWord imm, const BaseIndex& address);
+  void storePtr(ImmPtr imm, const Address& address);
+  void storePtr(ImmPtr imm, const BaseIndex& address);
+  void storePtr(ImmGCPtr imm, const Address& address);
+  void storePtr(ImmGCPtr imm, const BaseIndex& address);
+  void storePtr(Register src, const Address& address);
+  void storePtr(Register src, const BaseIndex& address);
+  void storePtr(Register src, AbsoluteAddress dest);
+
+  void moveDouble(FloatRegister src, FloatRegister dest,
+                  Condition cc = Always) {
+    ma_vmov(src, dest, cc);
+  }
+
+  inline void incrementInt32Value(const Address& addr);
+
+  void cmp32(Register lhs, Imm32 rhs);
+  void cmp32(Register lhs, Register rhs);
+  void cmp32(const Address& lhs, Imm32 rhs);
+  void cmp32(const Address& lhs, Register rhs);
+
+  void cmpPtr(Register lhs, Register rhs);
+  void cmpPtr(Register lhs, ImmWord rhs);
+  void cmpPtr(Register lhs, ImmPtr rhs);
+  void cmpPtr(Register lhs, ImmGCPtr rhs);
+  void cmpPtr(Register lhs, Imm32 rhs);
+  void cmpPtr(const Address& lhs, Register rhs);
+  void cmpPtr(const Address& lhs, ImmWord rhs);
+  void cmpPtr(const Address& lhs, ImmPtr rhs);
+  void cmpPtr(const Address& lhs, ImmGCPtr rhs);
+  void cmpPtr(const Address& lhs, Imm32 rhs);
+
+  void setStackArg(Register reg, uint32_t arg);
+
+  void breakpoint();
+  // Conditional breakpoint.
+  void breakpoint(Condition cc);
+
+  // Trigger the simulator's interactive read-eval-print loop.
+  // The message will be printed at the stopping point.
+  // (On non-simulator builds, does nothing.)
+  void simulatorStop(const char* msg);
+
+  // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, other).
+  // Checks for NaN if canBeNaN is true.
+  void minMaxDouble(FloatRegister srcDest, FloatRegister other, bool canBeNaN,
+                    bool isMax);
+  void minMaxFloat32(FloatRegister srcDest, FloatRegister other, bool canBeNaN,
+                     bool isMax);
+
+  void compareDouble(FloatRegister lhs, FloatRegister rhs);
+
+  void compareFloat(FloatRegister lhs, FloatRegister rhs);
+
+  void checkStackAlignment();
+
+  // If source is a double, load it into dest. If source is int32, convert it
+  // to double. Else, branch to failure.
+  void ensureDouble(const ValueOperand& source, FloatRegister dest,
+                    Label* failure);
+
+  void emitSet(Assembler::Condition cond, Register dest) {
+    ma_mov(Imm32(0), dest);
+    ma_mov(Imm32(1), dest, cond);
+  }
+
+  void testNullSet(Condition cond, const ValueOperand& value, Register dest) {
+    cond = testNull(cond, value);
+    emitSet(cond, dest);
+  }
+
+  void testObjectSet(Condition cond, const ValueOperand& value, Register dest) {
+    cond = testObject(cond, value);
+    emitSet(cond, dest);
+  }
+
+  void testUndefinedSet(Condition cond, const ValueOperand& value,
+                        Register dest) {
+    cond = testUndefined(cond, value);
+    emitSet(cond, dest);
+  }
+
+ protected:
+  bool buildOOLFakeExitFrame(void* fakeReturnAddr);
+
+ public:
+  void computeEffectiveAddress(const Address& address, Register dest) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_add(address.base, Imm32(address.offset), dest, scratch, LeaveCC);
+  }
+  void computeEffectiveAddress(const BaseIndex& address, Register dest) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_alu(address.base, lsl(address.index, address.scale), dest, OpAdd,
+           LeaveCC);
+    if (address.offset) {
+      ma_add(dest, Imm32(address.offset), dest, scratch, LeaveCC);
+    }
+  }
+  void floor(FloatRegister input, Register output, Label* handleNotAnInt);
+  void floorf(FloatRegister input, Register output, Label* handleNotAnInt);
+  void ceil(FloatRegister input, Register output, Label* handleNotAnInt);
+  void ceilf(FloatRegister input, Register output, Label* handleNotAnInt);
+  void round(FloatRegister input, Register output, Label* handleNotAnInt,
+             FloatRegister tmp);
+  void roundf(FloatRegister input, Register output, Label* handleNotAnInt,
+              FloatRegister tmp);
+  void trunc(FloatRegister input, Register output, Label* handleNotAnInt);
+  void truncf(FloatRegister input, Register output, Label* handleNotAnInt);
+
+  void lea(Operand addr, Register dest) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_add(addr.baseReg(), Imm32(addr.disp()), dest, scratch);
+  }
+
+  void abiret() { as_bx(lr); }
+
+  void moveFloat32(FloatRegister src, FloatRegister dest,
+                   Condition cc = Always) {
+    as_vmov(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+            cc);
+  }
+
+  // Instrumentation for entering and leaving the profiler.
+  void profilerEnterFrame(Register framePtr, Register scratch);
+  void profilerExitFrame();
+};
+
+typedef MacroAssemblerARMCompat MacroAssemblerSpecific;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_MacroAssembler_arm_h */
diff --git a/js/src/jit/arm/MoveEmitter-arm.cpp b/js/src/jit/arm/MoveEmitter-arm.cpp
new file mode 100644
index 0000000000..1807c41b50
--- /dev/null
+++ b/js/src/jit/arm/MoveEmitter-arm.cpp
@@ -0,0 +1,413 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/MoveEmitter-arm.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+MoveEmitterARM::MoveEmitterARM(MacroAssembler& masm)
+    : inCycle_(0),
+      masm(masm),
+      pushedAtCycle_(-1),
+      pushedAtSpill_(-1),
+      spilledReg_(InvalidReg),
+      spilledFloatReg_(InvalidFloatReg) {
+  pushedAtStart_ = masm.framePushed();
+}
+
+void MoveEmitterARM::emit(const MoveResolver& moves) {
+  if (moves.numCycles()) {
+    // Reserve stack for cycle resolution
+    static_assert(SpillSlotSize == 8);
+    masm.reserveStack(moves.numCycles() * SpillSlotSize);
+    pushedAtCycle_ = masm.framePushed();
+  }
+
+  for (size_t i = 0; i < moves.numMoves(); i++) {
+    emit(moves.getMove(i));
+  }
+}
+
+MoveEmitterARM::~MoveEmitterARM() { assertDone(); }
+
+Address MoveEmitterARM::cycleSlot(uint32_t slot, uint32_t subslot) const {
+  int32_t offset = masm.framePushed() - pushedAtCycle_;
+  MOZ_ASSERT(offset < 4096 && offset > -4096);
+  return Address(StackPointer, offset + slot * sizeof(double) + subslot);
+}
+
+Address MoveEmitterARM::spillSlot() const {
+  int32_t offset = masm.framePushed() - pushedAtSpill_;
+  MOZ_ASSERT(offset < 4096 && offset > -4096);
+  return Address(StackPointer, offset);
+}
+
+Address MoveEmitterARM::toAddress(const MoveOperand& operand) const {
+  MOZ_ASSERT(operand.isMemoryOrEffectiveAddress());
+
+  if (operand.base() != StackPointer) {
+    return Address(operand.base(), operand.disp());
+  }
+
+  MOZ_ASSERT(operand.disp() >= 0);
+
+  // Otherwise, the stack offset may need to be adjusted.
+  return Address(StackPointer,
+                 operand.disp() + (masm.framePushed() - pushedAtStart_));
+}
+
+Register MoveEmitterARM::tempReg() {
+  if (spilledReg_ != InvalidReg) {
+    return spilledReg_;
+  }
+
+  // For now, just pick r12/ip as the eviction point. This is totally random,
+  // and if it ends up being bad, we can use actual heuristics later. r12 is
+  // actually a bad choice. It is the scratch register, which is frequently
+  // used for address computations, such as those found when we attempt to
+  // access values more than 4096 off of the stack pointer. Instead, use lr,
+  // the LinkRegister.
+  spilledReg_ = r14;
+  if (pushedAtSpill_ == -1) {
+    masm.Push(spilledReg_);
+    pushedAtSpill_ = masm.framePushed();
+  } else {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_str(spilledReg_, spillSlot(), scratch);
+  }
+  return spilledReg_;
+}
+
+void MoveEmitterARM::breakCycle(const MoveOperand& from, const MoveOperand& to,
+                                MoveOp::Type type, uint32_t slotId) {
+  // There is some pattern:
+  //   (A -> B)
+  //   (B -> A)
+  //
+  // This case handles (A -> B), which we reach first. We save B, then allow
+  // the original move to continue.
+
+  ScratchRegisterScope scratch(masm);
+
+  switch (type) {
+    case MoveOp::FLOAT32:
+      if (to.isMemory()) {
+        ScratchFloat32Scope scratchFloat32(masm);
+        masm.ma_vldr(toAddress(to), scratchFloat32, scratch);
+        // Since it is uncertain if the load will be aligned or not
+        // just fill both of them with the same value.
+        masm.ma_vstr(scratchFloat32, cycleSlot(slotId, 0), scratch);
+        masm.ma_vstr(scratchFloat32, cycleSlot(slotId, 4), scratch);
+      } else if (to.isGeneralReg()) {
+        // Since it is uncertain if the load will be aligned or not
+        // just fill both of them with the same value.
+        masm.ma_str(to.reg(), cycleSlot(slotId, 0), scratch);
+        masm.ma_str(to.reg(), cycleSlot(slotId, 4), scratch);
+      } else {
+        FloatRegister src = to.floatReg();
+        // Just always store the largest possible size. Currently, this is
+        // a double. When SIMD is added, two doubles will need to be stored.
+        masm.ma_vstr(src.doubleOverlay(), cycleSlot(slotId, 0), scratch);
+      }
+      break;
+    case MoveOp::DOUBLE:
+      if (to.isMemory()) {
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vldr(toAddress(to), scratchDouble, scratch);
+        masm.ma_vstr(scratchDouble, cycleSlot(slotId, 0), scratch);
+      } else if (to.isGeneralRegPair()) {
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vxfer(to.evenReg(), to.oddReg(), scratchDouble);
+        masm.ma_vstr(scratchDouble, cycleSlot(slotId, 0), scratch);
+      } else {
+        masm.ma_vstr(to.floatReg().doubleOverlay(), cycleSlot(slotId, 0),
+                     scratch);
+      }
+      break;
+    case MoveOp::INT32:
+    case MoveOp::GENERAL:
+      // an non-vfp value
+      if (to.isMemory()) {
+        Register temp = tempReg();
+        masm.ma_ldr(toAddress(to), temp, scratch);
+        masm.ma_str(temp, cycleSlot(0, 0), scratch);
+      } else {
+        if (to.reg() == spilledReg_) {
+          // If the destination was spilled, restore it first.
+          masm.ma_ldr(spillSlot(), spilledReg_, scratch);
+          spilledReg_ = InvalidReg;
+        }
+        masm.ma_str(to.reg(), cycleSlot(0, 0), scratch);
+      }
+      break;
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
+
+void MoveEmitterARM::completeCycle(const MoveOperand& from,
+                                   const MoveOperand& to, MoveOp::Type type,
+                                   uint32_t slotId) {
+  // There is some pattern:
+  //   (A -> B)
+  //   (B -> A)
+  //
+  // This case handles (B -> A), which we reach last. We emit a move from the
+  // saved value of B, to A.
+
+  ScratchRegisterScope scratch(masm);
+
+  switch (type) {
+    case MoveOp::FLOAT32:
+      MOZ_ASSERT(!to.isGeneralRegPair());
+      if (to.isMemory()) {
+        ScratchFloat32Scope scratchFloat32(masm);
+        masm.ma_vldr(cycleSlot(slotId, 0), scratchFloat32, scratch);
+        masm.ma_vstr(scratchFloat32, toAddress(to), scratch);
+      } else if (to.isGeneralReg()) {
+        MOZ_ASSERT(type == MoveOp::FLOAT32);
+        masm.ma_ldr(toAddress(from), to.reg(), scratch);
+      } else {
+        uint32_t offset = 0;
+        if ((!from.isMemory()) && from.floatReg().numAlignedAliased() == 1) {
+          offset = sizeof(float);
+        }
+        masm.ma_vldr(cycleSlot(slotId, offset), to.floatReg(), scratch);
+      }
+      break;
+    case MoveOp::DOUBLE:
+      MOZ_ASSERT(!to.isGeneralReg());
+      if (to.isMemory()) {
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vldr(cycleSlot(slotId, 0), scratchDouble, scratch);
+        masm.ma_vstr(scratchDouble, toAddress(to), scratch);
+      } else if (to.isGeneralRegPair()) {
+        MOZ_ASSERT(type == MoveOp::DOUBLE);
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vldr(toAddress(from), scratchDouble, scratch);
+        masm.ma_vxfer(scratchDouble, to.evenReg(), to.oddReg());
+      } else {
+        uint32_t offset = 0;
+        if ((!from.isMemory()) && from.floatReg().numAlignedAliased() == 1) {
+          offset = sizeof(float);
+        }
+        masm.ma_vldr(cycleSlot(slotId, offset), to.floatReg(), scratch);
+      }
+      break;
+    case MoveOp::INT32:
+    case MoveOp::GENERAL:
+      MOZ_ASSERT(slotId == 0);
+      if (to.isMemory()) {
+        Register temp = tempReg();
+        masm.ma_ldr(cycleSlot(slotId, 0), temp, scratch);
+        masm.ma_str(temp, toAddress(to), scratch);
+      } else {
+        if (to.reg() == spilledReg_) {
+          // Make sure we don't re-clobber the spilled register later.
+          spilledReg_ = InvalidReg;
+        }
+        masm.ma_ldr(cycleSlot(slotId, 0), to.reg(), scratch);
+      }
+      break;
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
+
+void MoveEmitterARM::emitMove(const MoveOperand& from, const MoveOperand& to) {
+  // Register pairs are used to store Double values during calls.
+  MOZ_ASSERT(!from.isGeneralRegPair());
+  MOZ_ASSERT(!to.isGeneralRegPair());
+
+  ScratchRegisterScope scratch(masm);
+
+  if (to.isGeneralReg() && to.reg() == spilledReg_) {
+    // If the destination is the spilled register, make sure we
+    // don't re-clobber its value.
+    spilledReg_ = InvalidReg;
+  }
+
+  if (from.isGeneralReg()) {
+    if (from.reg() == spilledReg_) {
+      // If the source is a register that has been spilled, make sure
+      // to load the source back into that register.
+      masm.ma_ldr(spillSlot(), spilledReg_, scratch);
+      spilledReg_ = InvalidReg;
+    }
+    if (to.isMemoryOrEffectiveAddress()) {
+      masm.ma_str(from.reg(), toAddress(to), scratch);
+    } else {
+      masm.ma_mov(from.reg(), to.reg());
+    }
+  } else if (to.isGeneralReg()) {
+    MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
+    if (from.isMemory()) {
+      masm.ma_ldr(toAddress(from), to.reg(), scratch);
+    } else {
+      masm.ma_add(from.base(), Imm32(from.disp()), to.reg(), scratch);
+    }
+  } else {
+    // Memory to memory gpr move.
+    Register reg = tempReg();
+
+    MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
+    if (from.isMemory()) {
+      masm.ma_ldr(toAddress(from), reg, scratch);
+    } else {
+      masm.ma_add(from.base(), Imm32(from.disp()), reg, scratch);
+    }
+    MOZ_ASSERT(to.base() != reg);
+    masm.ma_str(reg, toAddress(to), scratch);
+  }
+}
+
+void MoveEmitterARM::emitFloat32Move(const MoveOperand& from,
+                                     const MoveOperand& to) {
+  // Register pairs are used to store Double values during calls.
+  MOZ_ASSERT(!from.isGeneralRegPair());
+  MOZ_ASSERT(!to.isGeneralRegPair());
+
+  ScratchRegisterScope scratch(masm);
+
+  if (from.isFloatReg()) {
+    if (to.isFloatReg()) {
+      masm.ma_vmov_f32(from.floatReg(), to.floatReg());
+    } else if (to.isGeneralReg()) {
+      masm.ma_vxfer(from.floatReg(), to.reg());
+    } else {
+      masm.ma_vstr(VFPRegister(from.floatReg()).singleOverlay(), toAddress(to),
+                   scratch);
+    }
+  } else if (from.isGeneralReg()) {
+    if (to.isFloatReg()) {
+      masm.ma_vxfer(from.reg(), to.floatReg());
+    } else if (to.isGeneralReg()) {
+      masm.ma_mov(from.reg(), to.reg());
+    } else {
+      masm.ma_str(from.reg(), toAddress(to), scratch);
+    }
+  } else if (to.isFloatReg()) {
+    masm.ma_vldr(toAddress(from), VFPRegister(to.floatReg()).singleOverlay(),
+                 scratch);
+  } else if (to.isGeneralReg()) {
+    masm.ma_ldr(toAddress(from), to.reg(), scratch);
+  } else {
+    // Memory to memory move.
+    MOZ_ASSERT(from.isMemory());
+    ScratchFloat32Scope scratchFloat32(masm);
+    masm.ma_vldr(toAddress(from), scratchFloat32, scratch);
+    masm.ma_vstr(scratchFloat32, toAddress(to), scratch);
+  }
+}
+
+void MoveEmitterARM::emitDoubleMove(const MoveOperand& from,
+                                    const MoveOperand& to) {
+  // Registers are used to store pointers / int32 / float32 values.
+  MOZ_ASSERT(!from.isGeneralReg());
+  MOZ_ASSERT(!to.isGeneralReg());
+
+  ScratchRegisterScope scratch(masm);
+
+  if (from.isFloatReg()) {
+    if (to.isFloatReg()) {
+      masm.ma_vmov(from.floatReg(), to.floatReg());
+    } else if (to.isGeneralRegPair()) {
+      masm.ma_vxfer(from.floatReg(), to.evenReg(), to.oddReg());
+    } else {
+      masm.ma_vstr(from.floatReg(), toAddress(to), scratch);
+    }
+  } else if (from.isGeneralRegPair()) {
+    if (to.isFloatReg()) {
+      masm.ma_vxfer(from.evenReg(), from.oddReg(), to.floatReg());
+    } else if (to.isGeneralRegPair()) {
+      MOZ_ASSERT(!from.aliases(to));
+      masm.ma_mov(from.evenReg(), to.evenReg());
+      masm.ma_mov(from.oddReg(), to.oddReg());
+    } else {
+      ScratchDoubleScope scratchDouble(masm);
+      masm.ma_vxfer(from.evenReg(), from.oddReg(), scratchDouble);
+      masm.ma_vstr(scratchDouble, toAddress(to), scratch);
+    }
+  } else if (to.isFloatReg()) {
+    masm.ma_vldr(toAddress(from), to.floatReg(), scratch);
+  } else if (to.isGeneralRegPair()) {
+    MOZ_ASSERT(from.isMemory());
+    Address src = toAddress(from);
+    // Note: We can safely use the MoveOperand's displacement here,
+    // even if the base is SP: MoveEmitter::toOperand adjusts
+    // SP-relative operands by the difference between the current
+    // stack usage and stackAdjust, which emitter.finish() resets to
+    // 0.
+    //
+    // Warning: if the offset isn't within [-255,+255] then this
+    // will assert-fail (or, if non-debug, load the wrong words).
+    // Nothing uses such an offset at the time of this writing.
+    masm.ma_ldrd(EDtrAddr(src.base, EDtrOffImm(src.offset)), to.evenReg(),
+                 to.oddReg());
+  } else {
+    // Memory to memory move.
+    MOZ_ASSERT(from.isMemory());
+    ScratchDoubleScope scratchDouble(masm);
+    masm.ma_vldr(toAddress(from), scratchDouble, scratch);
+    masm.ma_vstr(scratchDouble, toAddress(to), scratch);
+  }
+}
+
+void MoveEmitterARM::emit(const MoveOp& move) {
+  const MoveOperand& from = move.from();
+  const MoveOperand& to = move.to();
+
+  if (move.isCycleEnd() && move.isCycleBegin()) {
+    // A fun consequence of aliased registers is you can have multiple
+    // cycles at once, and one can end exactly where another begins.
+    breakCycle(from, to, move.endCycleType(), move.cycleBeginSlot());
+    completeCycle(from, to, move.type(), move.cycleEndSlot());
+    return;
+  }
+
+  if (move.isCycleEnd()) {
+    MOZ_ASSERT(inCycle_);
+    completeCycle(from, to, move.type(), move.cycleEndSlot());
+    MOZ_ASSERT(inCycle_ > 0);
+    inCycle_--;
+    return;
+  }
+
+  if (move.isCycleBegin()) {
+    breakCycle(from, to, move.endCycleType(), move.cycleBeginSlot());
+    inCycle_++;
+  }
+
+  switch (move.type()) {
+    case MoveOp::FLOAT32:
+      emitFloat32Move(from, to);
+      break;
+    case MoveOp::DOUBLE:
+      emitDoubleMove(from, to);
+      break;
+    case MoveOp::INT32:
+    case MoveOp::GENERAL:
+      emitMove(from, to);
+      break;
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
+
+void MoveEmitterARM::assertDone() { MOZ_ASSERT(inCycle_ == 0); }
+
+void MoveEmitterARM::finish() {
+  assertDone();
+
+  if (pushedAtSpill_ != -1 && spilledReg_ != InvalidReg) {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_ldr(spillSlot(), spilledReg_, scratch);
+  }
+  masm.freeStack(masm.framePushed() - pushedAtStart_);
+}
diff --git a/js/src/jit/arm/MoveEmitter-arm.h b/js/src/jit/arm/MoveEmitter-arm.h
new file mode 100644
index 0000000000..26a84fdbcc
--- /dev/null
+++ b/js/src/jit/arm/MoveEmitter-arm.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_MoveEmitter_arm_h
+#define jit_arm_MoveEmitter_arm_h
+
+#include <stdint.h>
+
+#include "jit/MoveResolver.h"
+#include "jit/Registers.h"
+
+namespace js {
+namespace jit {
+
+struct Address;
+class MacroAssembler;
+
+class MoveEmitterARM {
+  uint32_t inCycle_;
+  MacroAssembler& masm;
+
+  // Original stack push value.
+  uint32_t pushedAtStart_;
+
+  // These store stack offsets to spill locations, snapshotting
+  // codegen->framePushed_ at the time they were allocated. They are -1 if no
+  // stack space has been allocated for that particular spill.
+  int32_t pushedAtCycle_;
+  int32_t pushedAtSpill_;
+
+  // These are registers that are available for temporary use. They may be
+  // assigned InvalidReg. If no corresponding spill space has been assigned,
+  // then these registers do not need to be spilled.
+  Register spilledReg_;
+  FloatRegister spilledFloatReg_;
+
+  void assertDone();
+  Register tempReg();
+  FloatRegister tempFloatReg();
+  Address cycleSlot(uint32_t slot, uint32_t subslot) const;
+  Address spillSlot() const;
+  Address toAddress(const MoveOperand& operand) const;
+
+  void emitMove(const MoveOperand& from, const MoveOperand& to);
+  void emitFloat32Move(const MoveOperand& from, const MoveOperand& to);
+  void emitDoubleMove(const MoveOperand& from, const MoveOperand& to);
+  void breakCycle(const MoveOperand& from, const MoveOperand& to,
+                  MoveOp::Type type, uint32_t slot);
+  void completeCycle(const MoveOperand& from, const MoveOperand& to,
+                     MoveOp::Type type, uint32_t slot);
+  void emit(const MoveOp& move);
+
+ public:
+  explicit MoveEmitterARM(MacroAssembler& masm);
+  ~MoveEmitterARM();
+  void emit(const MoveResolver& moves);
+  void finish();
+
+  void setScratchRegister(Register reg) {}
+};
+
+typedef MoveEmitterARM MoveEmitter;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_MoveEmitter_arm_h */
diff --git a/js/src/jit/arm/SharedICHelpers-arm-inl.h b/js/src/jit/arm/SharedICHelpers-arm-inl.h
new file mode 100644
index 0000000000..2943bafbd8
--- /dev/null
+++ b/js/src/jit/arm/SharedICHelpers-arm-inl.h
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_SharedICHelpers_arm_inl_h
+#define jit_arm_SharedICHelpers_arm_inl_h
+
+#include "jit/BaselineFrame.h"
+#include "jit/SharedICHelpers.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+namespace js {
+namespace jit {
+
+inline void EmitBaselineTailCallVM(TrampolinePtr target, MacroAssembler& masm,
+                                   uint32_t argSize) {
+#ifdef DEBUG
+  // We assume during this that R0 and R1 have been pushed, and that R2 is
+  // unused.
+  static_assert(R2 == ValueOperand(r1, r0));
+
+  // Store frame size without VMFunction arguments for debug assertions.
+  masm.movePtr(FramePointer, r0);
+  masm.ma_sub(StackPointer, r0);
+  masm.sub32(Imm32(argSize), r0);
+  Address frameSizeAddr(FramePointer,
+                        BaselineFrame::reverseOffsetOfDebugFrameSize());
+  masm.store32(r0, frameSizeAddr);
+#endif
+
+  // Push frame descriptor and perform the tail call.
+  // ICTailCallReg (lr) already contains the return address (as we keep
+  // it there through the stub calls), but the VMWrapper code being called
+  // expects the return address to also be pushed on the stack.
+  static_assert(ICTailCallReg == lr);
+  masm.pushFrameDescriptor(FrameType::BaselineJS);
+  masm.push(lr);
+  masm.jump(target);
+}
+
+inline void EmitBaselineCallVM(TrampolinePtr target, MacroAssembler& masm) {
+  masm.pushFrameDescriptor(FrameType::BaselineStub);
+  masm.call(target);
+}
+
+inline void EmitBaselineEnterStubFrame(MacroAssembler& masm, Register scratch) {
+  MOZ_ASSERT(scratch != ICTailCallReg);
+
+#ifdef DEBUG
+  // Compute frame size.
+  masm.mov(FramePointer, scratch);
+  masm.ma_sub(StackPointer, scratch);
+
+  Address frameSizeAddr(FramePointer,
+                        BaselineFrame::reverseOffsetOfDebugFrameSize());
+  masm.store32(scratch, frameSizeAddr);
+#endif
+
+  // Push frame descriptor and return address.
+  masm.PushFrameDescriptor(FrameType::BaselineJS);
+  masm.Push(ICTailCallReg);
+
+  // Save old frame pointer, stack pointer and stub reg.
+  masm.Push(FramePointer);
+  masm.mov(StackPointer, FramePointer);
+
+  masm.Push(ICStubReg);
+
+  // We pushed 4 words, so the stack is still aligned to 8 bytes.
+  masm.checkStackAlignment();
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_SharedICHelpers_arm_inl_h */
diff --git a/js/src/jit/arm/SharedICHelpers-arm.h b/js/src/jit/arm/SharedICHelpers-arm.h
new file mode 100644
index 0000000000..93475abc62
--- /dev/null
+++ b/js/src/jit/arm/SharedICHelpers-arm.h
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_SharedICHelpers_arm_h
+#define jit_arm_SharedICHelpers_arm_h
+
+#include "jit/BaselineIC.h"
+#include "jit/JitFrames.h"
+#include "jit/MacroAssembler.h"
+#include "jit/SharedICRegisters.h"
+
+namespace js {
+namespace jit {
+
+// Distance from sp to the top Value inside an IC stub (no return address on the
+// stack on ARM).
+static const size_t ICStackValueOffset = 0;
+
+inline void EmitRestoreTailCallReg(MacroAssembler& masm) {
+  // No-op on ARM because link register is always holding the return address.
+}
+
+inline void EmitRepushTailCallReg(MacroAssembler& masm) {
+  // No-op on ARM because link register is always holding the return address.
+}
+
+inline void EmitCallIC(MacroAssembler& masm, CodeOffset* callOffset) {
+  // The stub pointer must already be in ICStubReg.
+  // Load stubcode pointer from the ICStub.
+  // R2 won't be active when we call ICs, so we can use r0.
+  static_assert(R2 == ValueOperand(r1, r0));
+  masm.loadPtr(Address(ICStubReg, ICStub::offsetOfStubCode()), r0);
+
+  // Call the stubcode via a direct branch-and-link.
+  masm.ma_blx(r0);
+  *callOffset = CodeOffset(masm.currentOffset());
+}
+
+inline void EmitReturnFromIC(MacroAssembler& masm) { masm.ma_mov(lr, pc); }
+
+inline void EmitBaselineLeaveStubFrame(MacroAssembler& masm) {
+  Address stubAddr(FramePointer, BaselineStubFrameLayout::ICStubOffsetFromFP);
+  masm.loadPtr(stubAddr, ICStubReg);
+
+  masm.mov(FramePointer, StackPointer);
+  masm.Pop(FramePointer);
+
+  // Load the return address.
+  masm.Pop(ICTailCallReg);
+
+  // Discard the frame descriptor.
+  ScratchRegisterScope scratch(masm);
+  masm.Pop(scratch);
+}
+
+template <typename AddrType>
+inline void EmitPreBarrier(MacroAssembler& masm, const AddrType& addr,
+                           MIRType type) {
+  // On ARM, lr is clobbered by guardedCallPreBarrier. Save it first.
+  masm.push(lr);
+  masm.guardedCallPreBarrier(addr, type);
+  masm.pop(lr);
+}
+
+inline void EmitStubGuardFailure(MacroAssembler& masm) {
+  // Load next stub into ICStubReg.
+  masm.loadPtr(Address(ICStubReg, ICCacheIRStub::offsetOfNext()), ICStubReg);
+
+  // Return address is already loaded, just jump to the next stubcode.
+  static_assert(ICTailCallReg == lr);
+  masm.jump(Address(ICStubReg, ICStub::offsetOfStubCode()));
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_SharedICHelpers_arm_h */
diff --git a/js/src/jit/arm/SharedICRegisters-arm.h b/js/src/jit/arm/SharedICRegisters-arm.h
new file mode 100644
index 0000000000..16aabbf0b3
--- /dev/null
+++ b/js/src/jit/arm/SharedICRegisters-arm.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_SharedICRegisters_arm_h
+#define jit_arm_SharedICRegisters_arm_h
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/Registers.h"
+#include "jit/RegisterSets.h"
+
+namespace js {
+namespace jit {
+
+// r15 = program-counter
+// r14 = link-register
+// r13 = stack-pointer
+// r11 = frame-pointer
+
+// ValueOperands R0, R1, and R2.
+// R0 == JSReturnReg, and R2 uses registers not preserved across calls. R1 value
+// should be preserved across calls.
+static constexpr ValueOperand R0(r3, r2);
+static constexpr ValueOperand R1(r5, r4);
+static constexpr ValueOperand R2(r1, r0);
+
+// ICTailCallReg and ICStubReg
+// These use registers that are not preserved across calls.
+static constexpr Register ICTailCallReg = r14;
+static constexpr Register ICStubReg = r9;
+
+// Register used internally by MacroAssemblerARM.
+static constexpr Register BaselineSecondScratchReg = r6;
+
+// R7 - R9 are generally available for use within stubcode.
+
+// Note that ICTailCallReg is actually just the link register. In ARM code
+// emission, we do not clobber ICTailCallReg since we keep the return
+// address for calls there.
+
+// FloatReg0 must be equal to ReturnFloatReg.
+static constexpr FloatRegister FloatReg0 = d0;
+static constexpr FloatRegister FloatReg1 = d1;
+static constexpr FloatRegister FloatReg2 = d2;
+static constexpr FloatRegister FloatReg3 = d3;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_SharedICRegisters_arm_h */
diff --git a/js/src/jit/arm/Simulator-arm.cpp b/js/src/jit/arm/Simulator-arm.cpp
new file mode 100644
index 0000000000..2afd6cb0de
--- /dev/null
+++ b/js/src/jit/arm/Simulator-arm.cpp
@@ -0,0 +1,5472 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm/Simulator-arm.h"
+
+#include "mozilla/Casting.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/FloatingPoint.h"
+#include "mozilla/Likely.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/arm/disasm/Constants-arm.h"
+#include "jit/AtomicOperations.h"
+#include "js/UniquePtr.h"
+#include "js/Utility.h"
+#include "threading/LockGuard.h"
+#include "vm/JSContext.h"
+#include "vm/Runtime.h"
+#include "vm/SharedMem.h"
+#include "wasm/WasmInstance.h"
+#include "wasm/WasmSignalHandlers.h"
+
+extern "C" {
+
+MOZ_EXPORT int64_t __aeabi_idivmod(int x, int y) {
+  // Run-time ABI for the ARM architecture specifies that for |INT_MIN / -1|
+  // "an implementation is (sic) may return any convenient value, possibly the
+  // original numerator."
+  //
+  // |INT_MIN / -1| traps on x86, which isn't listed as an allowed behavior in
+  // the ARM docs, so instead follow LLVM and return the numerator. (And zero
+  // for the remainder.)
+
+  if (x == INT32_MIN && y == -1) {
+    return uint32_t(x);
+  }
+
+  uint32_t lo = uint32_t(x / y);
+  uint32_t hi = uint32_t(x % y);
+  return (int64_t(hi) << 32) | lo;
+}
+
+MOZ_EXPORT int64_t __aeabi_uidivmod(int x, int y) {
+  uint32_t lo = uint32_t(x) / uint32_t(y);
+  uint32_t hi = uint32_t(x) % uint32_t(y);
+  return (int64_t(hi) << 32) | lo;
+}
+}
+
+namespace js {
+namespace jit {
+
+// For decoding load-exclusive and store-exclusive instructions.
+namespace excl {
+
+// Bit positions.
+enum {
+  ExclusiveOpHi = 24,    // Hi bit of opcode field
+  ExclusiveOpLo = 23,    // Lo bit of opcode field
+  ExclusiveSizeHi = 22,  // Hi bit of operand size field
+  ExclusiveSizeLo = 21,  // Lo bit of operand size field
+  ExclusiveLoad = 20     // Bit indicating load
+};
+
+// Opcode bits for exclusive instructions.
+enum { ExclusiveOpcode = 3 };
+
+// Operand size, Bits(ExclusiveSizeHi,ExclusiveSizeLo).
+enum {
+  ExclusiveWord = 0,
+  ExclusiveDouble = 1,
+  ExclusiveByte = 2,
+  ExclusiveHalf = 3
+};
+
+}  // namespace excl
+
+// Load/store multiple addressing mode.
+enum BlockAddrMode {
+  // Alias modes for comparison when writeback does not matter.
+  da_x = (0 | 0 | 0) << 21,  // Decrement after.
+  ia_x = (0 | 4 | 0) << 21,  // Increment after.
+  db_x = (8 | 0 | 0) << 21,  // Decrement before.
+  ib_x = (8 | 4 | 0) << 21,  // Increment before.
+};
+
+// Type of VFP register. Determines register encoding.
+enum VFPRegPrecision { kSinglePrecision = 0, kDoublePrecision = 1 };
+
+enum NeonListType { nlt_1 = 0x7, nlt_2 = 0xA, nlt_3 = 0x6, nlt_4 = 0x2 };
+
+// Supervisor Call (svc) specific support.
+
+// Special Software Interrupt codes when used in the presence of the ARM
+// simulator.
+// svc (formerly swi) provides a 24bit immediate value. Use bits 22:0 for
+// standard SoftwareInterrupCode. Bit 23 is reserved for the stop feature.
+enum SoftwareInterruptCodes {
+  kCallRtRedirected = 0x10,  // Transition to C code.
+  kBreakpoint = 0x20,        // Breakpoint.
+  kStopCode = 1 << 23        // Stop.
+};
+
+const uint32_t kStopCodeMask = kStopCode - 1;
+const uint32_t kMaxStopCode = kStopCode - 1;
+
+// -----------------------------------------------------------------------------
+// Instruction abstraction.
+
+// The class Instruction enables access to individual fields defined in the ARM
+// architecture instruction set encoding as described in figure A3-1.
+// Note that the Assembler uses typedef int32_t Instr.
+//
+// Example: Test whether the instruction at ptr does set the condition code
+// bits.
+//
+// bool InstructionSetsConditionCodes(byte* ptr) {
+//   Instruction* instr = Instruction::At(ptr);
+//   int type = instr->TypeValue();
+//   return ((type == 0) || (type == 1)) && instr->hasS();
+// }
+//
+class SimInstruction {
+ public:
+  enum { kInstrSize = 4, kPCReadOffset = 8 };
+
+  // Get the raw instruction bits.
+  inline Instr instructionBits() const {
+    return *reinterpret_cast<const Instr*>(this);
+  }
+
+  // Set the raw instruction bits to value.
+  inline void setInstructionBits(Instr value) {
+    *reinterpret_cast<Instr*>(this) = value;
+  }
+
+  // Read one particular bit out of the instruction bits.
+  inline int bit(int nr) const { return (instructionBits() >> nr) & 1; }
+
+  // Read a bit field's value out of the instruction bits.
+  inline int bits(int hi, int lo) const {
+    return (instructionBits() >> lo) & ((2 << (hi - lo)) - 1);
+  }
+
+  // Read a bit field out of the instruction bits.
+  inline int bitField(int hi, int lo) const {
+    return instructionBits() & (((2 << (hi - lo)) - 1) << lo);
+  }
+
+  // Accessors for the different named fields used in the ARM encoding.
+  // The naming of these accessor corresponds to figure A3-1.
+  //
+  // Two kind of accessors are declared:
+  // - <Name>Field() will return the raw field, i.e. the field's bits at their
+  //   original place in the instruction encoding.
+  //   e.g. if instr is the 'addgt r0, r1, r2' instruction, encoded as
+  //   0xC0810002 conditionField(instr) will return 0xC0000000.
+  // - <Name>Value() will return the field value, shifted back to bit 0.
+  //   e.g. if instr is the 'addgt r0, r1, r2' instruction, encoded as
+  //   0xC0810002 conditionField(instr) will return 0xC.
+
+  // Generally applicable fields
+  inline Assembler::ARMCondition conditionField() const {
+    return static_cast<Assembler::ARMCondition>(bitField(31, 28));
+  }
+  inline int typeValue() const { return bits(27, 25); }
+  inline int specialValue() const { return bits(27, 23); }
+
+  inline int rnValue() const { return bits(19, 16); }
+  inline int rdValue() const { return bits(15, 12); }
+
+  inline int coprocessorValue() const { return bits(11, 8); }
+
+  // Support for VFP.
+  // Vn(19-16) | Vd(15-12) |  Vm(3-0)
+  inline int vnValue() const { return bits(19, 16); }
+  inline int vmValue() const { return bits(3, 0); }
+  inline int vdValue() const { return bits(15, 12); }
+  inline int nValue() const { return bit(7); }
+  inline int mValue() const { return bit(5); }
+  inline int dValue() const { return bit(22); }
+  inline int rtValue() const { return bits(15, 12); }
+  inline int pValue() const { return bit(24); }
+  inline int uValue() const { return bit(23); }
+  inline int opc1Value() const { return (bit(23) << 2) | bits(21, 20); }
+  inline int opc2Value() const { return bits(19, 16); }
+  inline int opc3Value() const { return bits(7, 6); }
+  inline int szValue() const { return bit(8); }
+  inline int VLValue() const { return bit(20); }
+  inline int VCValue() const { return bit(8); }
+  inline int VAValue() const { return bits(23, 21); }
+  inline int VBValue() const { return bits(6, 5); }
+  inline int VFPNRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 16, 7);
+  }
+  inline int VFPMRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 0, 5);
+  }
+  inline int VFPDRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 12, 22);
+  }
+
+  // Fields used in Data processing instructions.
+  inline int opcodeValue() const { return static_cast<ALUOp>(bits(24, 21)); }
+  inline ALUOp opcodeField() const {
+    return static_cast<ALUOp>(bitField(24, 21));
+  }
+  inline int sValue() const { return bit(20); }
+
+  // With register.
+  inline int rmValue() const { return bits(3, 0); }
+  inline ShiftType shifttypeValue() const {
+    return static_cast<ShiftType>(bits(6, 5));
+  }
+  inline int rsValue() const { return bits(11, 8); }
+  inline int shiftAmountValue() const { return bits(11, 7); }
+
+  // With immediate.
+  inline int rotateValue() const { return bits(11, 8); }
+  inline int immed8Value() const { return bits(7, 0); }
+  inline int immed4Value() const { return bits(19, 16); }
+  inline int immedMovwMovtValue() const {
+    return immed4Value() << 12 | offset12Value();
+  }
+
+  // Fields used in Load/Store instructions.
+  inline int PUValue() const { return bits(24, 23); }
+  inline int PUField() const { return bitField(24, 23); }
+  inline int bValue() const { return bit(22); }
+  inline int wValue() const { return bit(21); }
+  inline int lValue() const { return bit(20); }
+
+  // With register uses same fields as Data processing instructions above with
+  // immediate.
+  inline int offset12Value() const { return bits(11, 0); }
+
+  // Multiple.
+  inline int rlistValue() const { return bits(15, 0); }
+
+  // Extra loads and stores.
+  inline int signValue() const { return bit(6); }
+  inline int hValue() const { return bit(5); }
+  inline int immedHValue() const { return bits(11, 8); }
+  inline int immedLValue() const { return bits(3, 0); }
+
+  // Fields used in Branch instructions.
+  inline int linkValue() const { return bit(24); }
+  inline int sImmed24Value() const { return ((instructionBits() << 8) >> 8); }
+
+  // Fields used in Software interrupt instructions.
+  inline SoftwareInterruptCodes svcValue() const {
+    return static_cast<SoftwareInterruptCodes>(bits(23, 0));
+  }
+
+  // Test for special encodings of type 0 instructions (extra loads and
+  // stores, as well as multiplications).
+  inline bool isSpecialType0() const { return (bit(7) == 1) && (bit(4) == 1); }
+
+  // Test for miscellaneous instructions encodings of type 0 instructions.
+  inline bool isMiscType0() const {
+    return bit(24) == 1 && bit(23) == 0 && bit(20) == 0 && (bit(7) == 0);
+  }
+
+  // Test for a nop instruction, which falls under type 1.
+  inline bool isNopType1() const { return bits(24, 0) == 0x0120F000; }
+
+  // Test for a nop instruction, which falls under type 1.
+  inline bool isCsdbType1() const { return bits(24, 0) == 0x0120F014; }
+
+  // Test for a stop instruction.
+  inline bool isStop() const {
+    return typeValue() == 7 && bit(24) == 1 && svcValue() >= kStopCode;
+  }
+
+  // Test for a udf instruction, which falls under type 3.
+  inline bool isUDF() const {
+    return (instructionBits() & 0xfff000f0) == 0xe7f000f0;
+  }
+
+  // Special accessors that test for existence of a value.
+  inline bool hasS() const { return sValue() == 1; }
+  inline bool hasB() const { return bValue() == 1; }
+  inline bool hasW() const { return wValue() == 1; }
+  inline bool hasL() const { return lValue() == 1; }
+  inline bool hasU() const { return uValue() == 1; }
+  inline bool hasSign() const { return signValue() == 1; }
+  inline bool hasH() const { return hValue() == 1; }
+  inline bool hasLink() const { return linkValue() == 1; }
+
+  // Decoding the double immediate in the vmov instruction.
+  double doubleImmedVmov() const;
+  // Decoding the float32 immediate in the vmov.f32 instruction.
+  float float32ImmedVmov() const;
+
+ private:
+  // Join split register codes, depending on single or double precision.
+  // four_bit is the position of the least-significant bit of the four
+  // bit specifier. one_bit is the position of the additional single bit
+  // specifier.
+  inline int VFPGlueRegValue(VFPRegPrecision pre, int four_bit, int one_bit) {
+    if (pre == kSinglePrecision) {
+      return (bits(four_bit + 3, four_bit) << 1) | bit(one_bit);
+    }
+    return (bit(one_bit) << 4) | bits(four_bit + 3, four_bit);
+  }
+
+  SimInstruction() = delete;
+  SimInstruction(const SimInstruction& other) = delete;
+  void operator=(const SimInstruction& other) = delete;
+};
+
+double SimInstruction::doubleImmedVmov() const {
+  // Reconstruct a double from the immediate encoded in the vmov instruction.
+  //
+  //   instruction: [xxxxxxxx,xxxxabcd,xxxxxxxx,xxxxefgh]
+  //   double: [aBbbbbbb,bbcdefgh,00000000,00000000,
+  //            00000000,00000000,00000000,00000000]
+  //
+  // where B = ~b. Only the high 16 bits are affected.
+  uint64_t high16;
+  high16 = (bits(17, 16) << 4) | bits(3, 0);  // xxxxxxxx,xxcdefgh.
+  high16 |= (0xff * bit(18)) << 6;            // xxbbbbbb,bbxxxxxx.
+  high16 |= (bit(18) ^ 1) << 14;              // xBxxxxxx,xxxxxxxx.
+  high16 |= bit(19) << 15;                    // axxxxxxx,xxxxxxxx.
+
+  uint64_t imm = high16 << 48;
+  return mozilla::BitwiseCast<double>(imm);
+}
+
+float SimInstruction::float32ImmedVmov() const {
+  // Reconstruct a float32 from the immediate encoded in the vmov instruction.
+  //
+  //   instruction: [xxxxxxxx,xxxxabcd,xxxxxxxx,xxxxefgh]
+  //   float32: [aBbbbbbc, defgh000, 00000000, 00000000]
+  //
+  // where B = ~b. Only the high 16 bits are affected.
+  uint32_t imm;
+  imm = (bits(17, 16) << 23) | (bits(3, 0) << 19);  // xxxxxxxc,defgh000.0.0
+  imm |= (0x1f * bit(18)) << 25;                    // xxbbbbbx,xxxxxxxx.0.0
+  imm |= (bit(18) ^ 1) << 30;                       // xBxxxxxx,xxxxxxxx.0.0
+  imm |= bit(19) << 31;                             // axxxxxxx,xxxxxxxx.0.0
+
+  return mozilla::BitwiseCast<float>(imm);
+}
+
+class CachePage {
+ public:
+  static const int LINE_VALID = 0;
+  static const int LINE_INVALID = 1;
+  static const int kPageShift = 12;
+  static const int kPageSize = 1 << kPageShift;
+  static const int kPageMask = kPageSize - 1;
+  static const int kLineShift = 2;  // The cache line is only 4 bytes right now.
+  static const int kLineLength = 1 << kLineShift;
+  static const int kLineMask = kLineLength - 1;
+
+  CachePage() { memset(&validity_map_, LINE_INVALID, sizeof(validity_map_)); }
+  char* validityByte(int offset) {
+    return &validity_map_[offset >> kLineShift];
+  }
+  char* cachedData(int offset) { return &data_[offset]; }
+
+ private:
+  char data_[kPageSize];  // The cached data.
+  static const int kValidityMapSize = kPageSize >> kLineShift;
+  char validity_map_[kValidityMapSize];  // One byte per line.
+};
+
+// Protects the icache() and redirection() properties of the
+// Simulator.
+class AutoLockSimulatorCache : public LockGuard<Mutex> {
+  using Base = LockGuard<Mutex>;
+
+ public:
+  explicit AutoLockSimulatorCache()
+      : Base(SimulatorProcess::singleton_->cacheLock_) {}
+};
+
+mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
+    SimulatorProcess::ICacheCheckingDisableCount(
+        1);  // Checking is disabled by default.
+SimulatorProcess* SimulatorProcess::singleton_ = nullptr;
+
+int64_t Simulator::StopSimAt = -1L;
+
+Simulator* Simulator::Create() {
+  auto sim = MakeUnique<Simulator>();
+  if (!sim) {
+    return nullptr;
+  }
+
+  if (!sim->init()) {
+    return nullptr;
+  }
+
+  char* stopAtStr = getenv("ARM_SIM_STOP_AT");
+  int64_t stopAt;
+  if (stopAtStr && sscanf(stopAtStr, "%lld", &stopAt) == 1) {
+    fprintf(stderr, "\nStopping simulation at icount %lld\n", stopAt);
+    Simulator::StopSimAt = stopAt;
+  }
+
+  return sim.release();
+}
+
+void Simulator::Destroy(Simulator* sim) { js_delete(sim); }
+
+void Simulator::disassemble(SimInstruction* instr, size_t n) {
+#ifdef JS_DISASM_ARM
+  disasm::NameConverter converter;
+  disasm::Disassembler dasm(converter);
+  disasm::EmbeddedVector<char, disasm::ReasonableBufferSize> buffer;
+  while (n-- > 0) {
+    dasm.InstructionDecode(buffer, reinterpret_cast<uint8_t*>(instr));
+    fprintf(stderr, "  0x%08x  %s\n", uint32_t(instr), buffer.start());
+    instr = reinterpret_cast<SimInstruction*>(
+        reinterpret_cast<uint8_t*>(instr) + 4);
+  }
+#endif
+}
+
+void Simulator::disasm(SimInstruction* instr) { disassemble(instr, 1); }
+
+void Simulator::disasm(SimInstruction* instr, size_t n) {
+  disassemble(instr, n);
+}
+
+void Simulator::disasm(SimInstruction* instr, size_t m, size_t n) {
+  disassemble(reinterpret_cast<SimInstruction*>(
+                  reinterpret_cast<uint8_t*>(instr) - m * 4),
+              n);
+}
+
+// The ArmDebugger class is used by the simulator while debugging simulated ARM
+// code.
+class ArmDebugger {
+ public:
+  explicit ArmDebugger(Simulator* sim) : sim_(sim) {}
+
+  void stop(SimInstruction* instr);
+  void debug();
+
+ private:
+  static const Instr kBreakpointInstr =
+      (Assembler::AL | (7 * (1 << 25)) | (1 * (1 << 24)) | kBreakpoint);
+  static const Instr kNopInstr = (Assembler::AL | (13 * (1 << 21)));
+
+  Simulator* sim_;
+
+  int32_t getRegisterValue(int regnum);
+  double getRegisterPairDoubleValue(int regnum);
+  void getVFPDoubleRegisterValue(int regnum, double* value);
+  bool getValue(const char* desc, int32_t* value);
+  bool getVFPDoubleValue(const char* desc, double* value);
+
+  // Set or delete a breakpoint. Returns true if successful.
+  bool setBreakpoint(SimInstruction* breakpc);
+  bool deleteBreakpoint(SimInstruction* breakpc);
+
+  // Undo and redo all breakpoints. This is needed to bracket disassembly and
+  // execution to skip past breakpoints when run from the debugger.
+  void undoBreakpoints();
+  void redoBreakpoints();
+};
+
+void ArmDebugger::stop(SimInstruction* instr) {
+  // Get the stop code.
+  uint32_t code = instr->svcValue() & kStopCodeMask;
+  // Retrieve the encoded address, which comes just after this stop.
+  char* msg =
+      *reinterpret_cast<char**>(sim_->get_pc() + SimInstruction::kInstrSize);
+  // Update this stop description.
+  if (sim_->isWatchedStop(code) && !sim_->watched_stops_[code].desc) {
+    sim_->watched_stops_[code].desc = msg;
+  }
+  // Print the stop message and code if it is not the default code.
+  if (code != kMaxStopCode) {
+    printf("Simulator hit stop %u: %s\n", code, msg);
+  } else {
+    printf("Simulator hit %s\n", msg);
+  }
+  sim_->set_pc(sim_->get_pc() + 2 * SimInstruction::kInstrSize);
+  debug();
+}
+
+int32_t ArmDebugger::getRegisterValue(int regnum) {
+  if (regnum == Registers::pc) {
+    return sim_->get_pc();
+  }
+  return sim_->get_register(regnum);
+}
+
+double ArmDebugger::getRegisterPairDoubleValue(int regnum) {
+  return sim_->get_double_from_register_pair(regnum);
+}
+
+void ArmDebugger::getVFPDoubleRegisterValue(int regnum, double* out) {
+  sim_->get_double_from_d_register(regnum, out);
+}
+
+bool ArmDebugger::getValue(const char* desc, int32_t* value) {
+  Register reg = Register::FromName(desc);
+  if (reg != InvalidReg) {
+    *value = getRegisterValue(reg.code());
+    return true;
+  }
+  if (strncmp(desc, "0x", 2) == 0) {
+    return sscanf(desc + 2, "%x", reinterpret_cast<uint32_t*>(value)) == 1;
+  }
+  return sscanf(desc, "%u", reinterpret_cast<uint32_t*>(value)) == 1;
+}
+
+bool ArmDebugger::getVFPDoubleValue(const char* desc, double* value) {
+  FloatRegister reg = FloatRegister::FromCode(FloatRegister::FromName(desc));
+  if (reg.isInvalid()) {
+    return false;
+  }
+
+  if (reg.isSingle()) {
+    float fval;
+    sim_->get_float_from_s_register(reg.id(), &fval);
+    *value = fval;
+    return true;
+  }
+
+  sim_->get_double_from_d_register(reg.id(), value);
+  return true;
+}
+
+bool ArmDebugger::setBreakpoint(SimInstruction* breakpc) {
+  // Check if a breakpoint can be set. If not return without any side-effects.
+  if (sim_->break_pc_) {
+    return false;
+  }
+
+  // Set the breakpoint.
+  sim_->break_pc_ = breakpc;
+  sim_->break_instr_ = breakpc->instructionBits();
+  // Not setting the breakpoint instruction in the code itself. It will be set
+  // when the debugger shell continues.
+  return true;
+}
+
+bool ArmDebugger::deleteBreakpoint(SimInstruction* breakpc) {
+  if (sim_->break_pc_ != nullptr) {
+    sim_->break_pc_->setInstructionBits(sim_->break_instr_);
+  }
+
+  sim_->break_pc_ = nullptr;
+  sim_->break_instr_ = 0;
+  return true;
+}
+
+void ArmDebugger::undoBreakpoints() {
+  if (sim_->break_pc_) {
+    sim_->break_pc_->setInstructionBits(sim_->break_instr_);
+  }
+}
+
+void ArmDebugger::redoBreakpoints() {
+  if (sim_->break_pc_) {
+    sim_->break_pc_->setInstructionBits(kBreakpointInstr);
+  }
+}
+
+static char* ReadLine(const char* prompt) {
+  UniqueChars result;
+  char line_buf[256];
+  int offset = 0;
+  bool keep_going = true;
+  fprintf(stdout, "%s", prompt);
+  fflush(stdout);
+  while (keep_going) {
+    if (fgets(line_buf, sizeof(line_buf), stdin) == nullptr) {
+      // fgets got an error. Just give up.
+      return nullptr;
+    }
+    int len = strlen(line_buf);
+    if (len > 0 && line_buf[len - 1] == '\n') {
+      // Since we read a new line we are done reading the line. This will
+      // exit the loop after copying this buffer into the result.
+      keep_going = false;
+    }
+    if (!result) {
+      // Allocate the initial result and make room for the terminating
+      // '\0'.
+      result.reset(js_pod_malloc<char>(len + 1));
+      if (!result) {
+        return nullptr;
+      }
+    } else {
+      // Allocate a new result with enough room for the new addition.
+      int new_len = offset + len + 1;
+      char* new_result = js_pod_malloc<char>(new_len);
+      if (!new_result) {
+        return nullptr;
+      }
+      // Copy the existing input into the new array and set the new
+      // array as the result.
+      memcpy(new_result, result.get(), offset * sizeof(char));
+      result.reset(new_result);
+    }
+    // Copy the newly read line into the result.
+    memcpy(result.get() + offset, line_buf, len * sizeof(char));
+    offset += len;
+  }
+
+  MOZ_ASSERT(result);
+  result[offset] = '\0';
+  return result.release();
+}
+
+void ArmDebugger::debug() {
+  intptr_t last_pc = -1;
+  bool done = false;
+
+#define COMMAND_SIZE 63
+#define ARG_SIZE 255
+
+#define STR(a) #a
+#define XSTR(a) STR(a)
+
+  char cmd[COMMAND_SIZE + 1];
+  char arg1[ARG_SIZE + 1];
+  char arg2[ARG_SIZE + 1];
+  char* argv[3] = {cmd, arg1, arg2};
+
+  // Make sure to have a proper terminating character if reaching the limit.
+  cmd[COMMAND_SIZE] = 0;
+  arg1[ARG_SIZE] = 0;
+  arg2[ARG_SIZE] = 0;
+
+  // Undo all set breakpoints while running in the debugger shell. This will
+  // make them invisible to all commands.
+  undoBreakpoints();
+
+#ifndef JS_DISASM_ARM
+  static bool disasm_warning_printed = false;
+  if (!disasm_warning_printed) {
+    printf(
+        "  No ARM disassembler present.  Enable JS_DISASM_ARM in "
+        "configure.in.");
+    disasm_warning_printed = true;
+  }
+#endif
+
+  while (!done && !sim_->has_bad_pc()) {
+    if (last_pc != sim_->get_pc()) {
+#ifdef JS_DISASM_ARM
+      disasm::NameConverter converter;
+      disasm::Disassembler dasm(converter);
+      disasm::EmbeddedVector<char, disasm::ReasonableBufferSize> buffer;
+      dasm.InstructionDecode(buffer,
+                             reinterpret_cast<uint8_t*>(sim_->get_pc()));
+      printf("  0x%08x  %s\n", sim_->get_pc(), buffer.start());
+#endif
+      last_pc = sim_->get_pc();
+    }
+    char* line = ReadLine("sim> ");
+    if (line == nullptr) {
+      break;
+    } else {
+      char* last_input = sim_->lastDebuggerInput();
+      if (strcmp(line, "\n") == 0 && last_input != nullptr) {
+        line = last_input;
+      } else {
+        // Ownership is transferred to sim_;
+        sim_->setLastDebuggerInput(line);
+      }
+
+      // Use sscanf to parse the individual parts of the command line. At the
+      // moment no command expects more than two parameters.
+      int argc = sscanf(line,
+                              "%" XSTR(COMMAND_SIZE) "s "
+                              "%" XSTR(ARG_SIZE) "s "
+                              "%" XSTR(ARG_SIZE) "s",
+                              cmd, arg1, arg2);
+      if (argc < 0) {
+        continue;
+      } else if ((strcmp(cmd, "si") == 0) || (strcmp(cmd, "stepi") == 0)) {
+        sim_->instructionDecode(
+            reinterpret_cast<SimInstruction*>(sim_->get_pc()));
+        sim_->icount_++;
+      } else if ((strcmp(cmd, "skip") == 0)) {
+        sim_->set_pc(sim_->get_pc() + 4);
+        sim_->icount_++;
+      } else if ((strcmp(cmd, "c") == 0) || (strcmp(cmd, "cont") == 0)) {
+        // Execute the one instruction we broke at with breakpoints
+        // disabled.
+        sim_->instructionDecode(
+            reinterpret_cast<SimInstruction*>(sim_->get_pc()));
+        sim_->icount_++;
+        // Leave the debugger shell.
+        done = true;
+      } else if ((strcmp(cmd, "p") == 0) || (strcmp(cmd, "print") == 0)) {
+        if (argc == 2 || (argc == 3 && strcmp(arg2, "fp") == 0)) {
+          int32_t value;
+          double dvalue;
+          if (strcmp(arg1, "all") == 0) {
+            for (uint32_t i = 0; i < Registers::Total; i++) {
+              value = getRegisterValue(i);
+              printf("%3s: 0x%08x %10d", Registers::GetName(i), value, value);
+              if ((argc == 3 && strcmp(arg2, "fp") == 0) && i < 8 &&
+                  (i % 2) == 0) {
+                dvalue = getRegisterPairDoubleValue(i);
+                printf(" (%.16g)\n", dvalue);
+              } else {
+                printf("\n");
+              }
+            }
+            for (uint32_t i = 0; i < FloatRegisters::TotalPhys; i++) {
+              getVFPDoubleRegisterValue(i, &dvalue);
+              uint64_t as_words = mozilla::BitwiseCast<uint64_t>(dvalue);
+              printf("%3s: %.16g 0x%08x %08x\n",
+                     FloatRegister::FromCode(i).name(), dvalue,
+                     static_cast<uint32_t>(as_words >> 32),
+                     static_cast<uint32_t>(as_words & 0xffffffff));
+            }
+          } else {
+            if (getValue(arg1, &value)) {
+              printf("%s: 0x%08x %d \n", arg1, value, value);
+            } else if (getVFPDoubleValue(arg1, &dvalue)) {
+              uint64_t as_words = mozilla::BitwiseCast<uint64_t>(dvalue);
+              printf("%s: %.16g 0x%08x %08x\n", arg1, dvalue,
+                     static_cast<uint32_t>(as_words >> 32),
+                     static_cast<uint32_t>(as_words & 0xffffffff));
+            } else {
+              printf("%s unrecognized\n", arg1);
+            }
+          }
+        } else {
+          printf("print <register>\n");
+        }
+      } else if (strcmp(cmd, "stack") == 0 || strcmp(cmd, "mem") == 0) {
+        int32_t* cur = nullptr;
+        int32_t* end = nullptr;
+        int next_arg = 1;
+
+        if (strcmp(cmd, "stack") == 0) {
+          cur = reinterpret_cast<int32_t*>(sim_->get_register(Simulator::sp));
+        } else {  // "mem"
+          int32_t value;
+          if (!getValue(arg1, &value)) {
+            printf("%s unrecognized\n", arg1);
+            continue;
+          }
+          cur = reinterpret_cast<int32_t*>(value);
+          next_arg++;
+        }
+
+        int32_t words;
+        if (argc == next_arg) {
+          words = 10;
+        } else {
+          if (!getValue(argv[next_arg], &words)) {
+            words = 10;
+          }
+        }
+        end = cur + words;
+
+        while (cur < end) {
+          printf("  %p:  0x%08x %10d", cur, *cur, *cur);
+          printf("\n");
+          cur++;
+        }
+      } else if (strcmp(cmd, "disasm") == 0 || strcmp(cmd, "di") == 0) {
+#ifdef JS_DISASM_ARM
+        uint8_t* prev = nullptr;
+        uint8_t* cur = nullptr;
+        uint8_t* end = nullptr;
+
+        if (argc == 1) {
+          cur = reinterpret_cast<uint8_t*>(sim_->get_pc());
+          end = cur + (10 * SimInstruction::kInstrSize);
+        } else if (argc == 2) {
+          Register reg = Register::FromName(arg1);
+          if (reg != InvalidReg || strncmp(arg1, "0x", 2) == 0) {
+            // The argument is an address or a register name.
+            int32_t value;
+            if (getValue(arg1, &value)) {
+              cur = reinterpret_cast<uint8_t*>(value);
+              // Disassemble 10 instructions at <arg1>.
+              end = cur + (10 * SimInstruction::kInstrSize);
+            }
+          } else {
+            // The argument is the number of instructions.
+            int32_t value;
+            if (getValue(arg1, &value)) {
+              cur = reinterpret_cast<uint8_t*>(sim_->get_pc());
+              // Disassemble <arg1> instructions.
+              end = cur + (value * SimInstruction::kInstrSize);
+            }
+          }
+        } else {
+          int32_t value1;
+          int32_t value2;
+          if (getValue(arg1, &value1) && getValue(arg2, &value2)) {
+            cur = reinterpret_cast<uint8_t*>(value1);
+            end = cur + (value2 * SimInstruction::kInstrSize);
+          }
+        }
+        while (cur < end) {
+          disasm::NameConverter converter;
+          disasm::Disassembler dasm(converter);
+          disasm::EmbeddedVector<char, disasm::ReasonableBufferSize> buffer;
+
+          prev = cur;
+          cur += dasm.InstructionDecode(buffer, cur);
+          printf("  0x%08x  %s\n", reinterpret_cast<uint32_t>(prev),
+                 buffer.start());
+        }
+#endif
+      } else if (strcmp(cmd, "gdb") == 0) {
+        printf("relinquishing control to gdb\n");
+#ifdef _MSC_VER
+        __debugbreak();
+#else
+        asm("int $3");
+#endif
+        printf("regaining control from gdb\n");
+      } else if (strcmp(cmd, "break") == 0) {
+        if (argc == 2) {
+          int32_t value;
+          if (getValue(arg1, &value)) {
+            if (!setBreakpoint(reinterpret_cast<SimInstruction*>(value))) {
+              printf("setting breakpoint failed\n");
+            }
+          } else {
+            printf("%s unrecognized\n", arg1);
+          }
+        } else {
+          printf("break <address>\n");
+        }
+      } else if (strcmp(cmd, "del") == 0) {
+        if (!deleteBreakpoint(nullptr)) {
+          printf("deleting breakpoint failed\n");
+        }
+      } else if (strcmp(cmd, "flags") == 0) {
+        printf("N flag: %d; ", sim_->n_flag_);
+        printf("Z flag: %d; ", sim_->z_flag_);
+        printf("C flag: %d; ", sim_->c_flag_);
+        printf("V flag: %d\n", sim_->v_flag_);
+        printf("INVALID OP flag: %d; ", sim_->inv_op_vfp_flag_);
+        printf("DIV BY ZERO flag: %d; ", sim_->div_zero_vfp_flag_);
+        printf("OVERFLOW flag: %d; ", sim_->overflow_vfp_flag_);
+        printf("UNDERFLOW flag: %d; ", sim_->underflow_vfp_flag_);
+        printf("INEXACT flag: %d;\n", sim_->inexact_vfp_flag_);
+      } else if (strcmp(cmd, "stop") == 0) {
+        int32_t value;
+        intptr_t stop_pc = sim_->get_pc() - 2 * SimInstruction::kInstrSize;
+        SimInstruction* stop_instr = reinterpret_cast<SimInstruction*>(stop_pc);
+        SimInstruction* msg_address = reinterpret_cast<SimInstruction*>(
+            stop_pc + SimInstruction::kInstrSize);
+        if ((argc == 2) && (strcmp(arg1, "unstop") == 0)) {
+          // Remove the current stop.
+          if (sim_->isStopInstruction(stop_instr)) {
+            stop_instr->setInstructionBits(kNopInstr);
+            msg_address->setInstructionBits(kNopInstr);
+          } else {
+            printf("Not at debugger stop.\n");
+          }
+        } else if (argc == 3) {
+          // Print information about all/the specified breakpoint(s).
+          if (strcmp(arg1, "info") == 0) {
+            if (strcmp(arg2, "all") == 0) {
+              printf("Stop information:\n");
+              for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
+                sim_->printStopInfo(i);
+              }
+            } else if (getValue(arg2, &value)) {
+              sim_->printStopInfo(value);
+            } else {
+              printf("Unrecognized argument.\n");
+            }
+          } else if (strcmp(arg1, "enable") == 0) {
+            // Enable all/the specified breakpoint(s).
+            if (strcmp(arg2, "all") == 0) {
+              for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
+                sim_->enableStop(i);
+              }
+            } else if (getValue(arg2, &value)) {
+              sim_->enableStop(value);
+            } else {
+              printf("Unrecognized argument.\n");
+            }
+          } else if (strcmp(arg1, "disable") == 0) {
+            // Disable all/the specified breakpoint(s).
+            if (strcmp(arg2, "all") == 0) {
+              for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
+                sim_->disableStop(i);
+              }
+            } else if (getValue(arg2, &value)) {
+              sim_->disableStop(value);
+            } else {
+              printf("Unrecognized argument.\n");
+            }
+          }
+        } else {
+          printf("Wrong usage. Use help command for more information.\n");
+        }
+      } else if ((strcmp(cmd, "h") == 0) || (strcmp(cmd, "help") == 0)) {
+        printf("cont\n");
+        printf("  continue execution (alias 'c')\n");
+        printf("skip\n");
+        printf("  skip one instruction (set pc to next instruction)\n");
+        printf("stepi\n");
+        printf("  step one instruction (alias 'si')\n");
+        printf("print <register>\n");
+        printf("  print register content (alias 'p')\n");
+        printf("  use register name 'all' to print all registers\n");
+        printf("  add argument 'fp' to print register pair double values\n");
+        printf("flags\n");
+        printf("  print flags\n");
+        printf("stack [<words>]\n");
+        printf("  dump stack content, default dump 10 words)\n");
+        printf("mem <address> [<words>]\n");
+        printf("  dump memory content, default dump 10 words)\n");
+        printf("disasm [<instructions>]\n");
+        printf("disasm [<address/register>]\n");
+        printf("disasm [[<address/register>] <instructions>]\n");
+        printf("  disassemble code, default is 10 instructions\n");
+        printf("  from pc (alias 'di')\n");
+        printf("gdb\n");
+        printf("  enter gdb\n");
+        printf("break <address>\n");
+        printf("  set a break point on the address\n");
+        printf("del\n");
+        printf("  delete the breakpoint\n");
+        printf("stop feature:\n");
+        printf("  Description:\n");
+        printf("    Stops are debug instructions inserted by\n");
+        printf("    the Assembler::stop() function.\n");
+        printf("    When hitting a stop, the Simulator will\n");
+        printf("    stop and and give control to the ArmDebugger.\n");
+        printf("    The first %d stop codes are watched:\n",
+               Simulator::kNumOfWatchedStops);
+        printf("    - They can be enabled / disabled: the Simulator\n");
+        printf("      will / won't stop when hitting them.\n");
+        printf("    - The Simulator keeps track of how many times they \n");
+        printf("      are met. (See the info command.) Going over a\n");
+        printf("      disabled stop still increases its counter. \n");
+        printf("  Commands:\n");
+        printf("    stop info all/<code> : print infos about number <code>\n");
+        printf("      or all stop(s).\n");
+        printf("    stop enable/disable all/<code> : enables / disables\n");
+        printf("      all or number <code> stop(s)\n");
+        printf("    stop unstop\n");
+        printf("      ignore the stop instruction at the current location\n");
+        printf("      from now on\n");
+      } else {
+        printf("Unknown command: %s\n", cmd);
+      }
+    }
+  }
+
+  // Add all the breakpoints back to stop execution and enter the debugger
+  // shell when hit.
+  redoBreakpoints();
+
+#undef COMMAND_SIZE
+#undef ARG_SIZE
+
+#undef STR
+#undef XSTR
+}
+
+static bool AllOnOnePage(uintptr_t start, int size) {
+  intptr_t start_page = (start & ~CachePage::kPageMask);
+  intptr_t end_page = ((start + size) & ~CachePage::kPageMask);
+  return start_page == end_page;
+}
+
+static CachePage* GetCachePageLocked(SimulatorProcess::ICacheMap& i_cache,
+                                     void* page) {
+  SimulatorProcess::ICacheMap::AddPtr p = i_cache.lookupForAdd(page);
+  if (p) {
+    return p->value();
+  }
+
+  AutoEnterOOMUnsafeRegion oomUnsafe;
+  CachePage* new_page = js_new<CachePage>();
+  if (!new_page || !i_cache.add(p, page, new_page)) {
+    oomUnsafe.crash("Simulator CachePage");
+  }
+
+  return new_page;
+}
+
+// Flush from start up to and not including start + size.
+static void FlushOnePageLocked(SimulatorProcess::ICacheMap& i_cache,
+                               intptr_t start, int size) {
+  MOZ_ASSERT(size <= CachePage::kPageSize);
+  MOZ_ASSERT(AllOnOnePage(start, size - 1));
+  MOZ_ASSERT((start & CachePage::kLineMask) == 0);
+  MOZ_ASSERT((size & CachePage::kLineMask) == 0);
+
+  void* page = reinterpret_cast<void*>(start & (~CachePage::kPageMask));
+  int offset = (start & CachePage::kPageMask);
+  CachePage* cache_page = GetCachePageLocked(i_cache, page);
+  char* valid_bytemap = cache_page->validityByte(offset);
+  memset(valid_bytemap, CachePage::LINE_INVALID, size >> CachePage::kLineShift);
+}
+
+static void FlushICacheLocked(SimulatorProcess::ICacheMap& i_cache,
+                              void* start_addr, size_t size) {
+  intptr_t start = reinterpret_cast<intptr_t>(start_addr);
+  int intra_line = (start & CachePage::kLineMask);
+  start -= intra_line;
+  size += intra_line;
+  size = ((size - 1) | CachePage::kLineMask) + 1;
+  int offset = (start & CachePage::kPageMask);
+  while (!AllOnOnePage(start, size - 1)) {
+    int bytes_to_flush = CachePage::kPageSize - offset;
+    FlushOnePageLocked(i_cache, start, bytes_to_flush);
+    start += bytes_to_flush;
+    size -= bytes_to_flush;
+    MOZ_ASSERT((start & CachePage::kPageMask) == 0);
+    offset = 0;
+  }
+  if (size != 0) {
+    FlushOnePageLocked(i_cache, start, size);
+  }
+}
+
+/* static */
+void SimulatorProcess::checkICacheLocked(SimInstruction* instr) {
+  intptr_t address = reinterpret_cast<intptr_t>(instr);
+  void* page = reinterpret_cast<void*>(address & (~CachePage::kPageMask));
+  void* line = reinterpret_cast<void*>(address & (~CachePage::kLineMask));
+  int offset = (address & CachePage::kPageMask);
+  CachePage* cache_page = GetCachePageLocked(icache(), page);
+  char* cache_valid_byte = cache_page->validityByte(offset);
+  bool cache_hit = (*cache_valid_byte == CachePage::LINE_VALID);
+  char* cached_line = cache_page->cachedData(offset & ~CachePage::kLineMask);
+
+  if (cache_hit) {
+    // Check that the data in memory matches the contents of the I-cache.
+    mozilla::DebugOnly<int> cmpret =
+        memcmp(reinterpret_cast<void*>(instr), cache_page->cachedData(offset),
+               SimInstruction::kInstrSize);
+    MOZ_ASSERT(cmpret == 0);
+  } else {
+    // Cache miss. Load memory into the cache.
+    memcpy(cached_line, line, CachePage::kLineLength);
+    *cache_valid_byte = CachePage::LINE_VALID;
+  }
+}
+
+HashNumber SimulatorProcess::ICacheHasher::hash(const Lookup& l) {
+  return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(l)) >> 2;
+}
+
+bool SimulatorProcess::ICacheHasher::match(const Key& k, const Lookup& l) {
+  MOZ_ASSERT((reinterpret_cast<intptr_t>(k) & CachePage::kPageMask) == 0);
+  MOZ_ASSERT((reinterpret_cast<intptr_t>(l) & CachePage::kPageMask) == 0);
+  return k == l;
+}
+
+void Simulator::setLastDebuggerInput(char* input) {
+  js_free(lastDebuggerInput_);
+  lastDebuggerInput_ = input;
+}
+
+/* static */
+void SimulatorProcess::FlushICache(void* start_addr, size_t size) {
+  JitSpewCont(JitSpew_CacheFlush, "[%p %zx]", start_addr, size);
+  if (!ICacheCheckingDisableCount) {
+    AutoLockSimulatorCache als;
+    js::jit::FlushICacheLocked(icache(), start_addr, size);
+  }
+}
+
+Simulator::Simulator() {
+  // Set up simulator support first. Some of this information is needed to
+  // setup the architecture state.
+
+  // Note, allocation and anything that depends on allocated memory is
+  // deferred until init(), in order to handle OOM properly.
+
+  stack_ = nullptr;
+  stackLimit_ = 0;
+  pc_modified_ = false;
+  icount_ = 0L;
+  break_pc_ = nullptr;
+  break_instr_ = 0;
+  single_stepping_ = false;
+  single_step_callback_ = nullptr;
+  single_step_callback_arg_ = nullptr;
+  skipCalleeSavedRegsCheck = false;
+
+  // Set up architecture state.
+  // All registers are initialized to zero to start with.
+  for (int i = 0; i < num_registers; i++) {
+    registers_[i] = 0;
+  }
+
+  n_flag_ = false;
+  z_flag_ = false;
+  c_flag_ = false;
+  v_flag_ = false;
+
+  for (int i = 0; i < num_d_registers * 2; i++) {
+    vfp_registers_[i] = 0;
+  }
+
+  n_flag_FPSCR_ = false;
+  z_flag_FPSCR_ = false;
+  c_flag_FPSCR_ = false;
+  v_flag_FPSCR_ = false;
+  FPSCR_rounding_mode_ = SimRZ;
+  FPSCR_default_NaN_mode_ = true;
+
+  inv_op_vfp_flag_ = false;
+  div_zero_vfp_flag_ = false;
+  overflow_vfp_flag_ = false;
+  underflow_vfp_flag_ = false;
+  inexact_vfp_flag_ = false;
+
+  // The lr and pc are initialized to a known bad value that will cause an
+  // access violation if the simulator ever tries to execute it.
+  registers_[pc] = bad_lr;
+  registers_[lr] = bad_lr;
+
+  lastDebuggerInput_ = nullptr;
+
+  exclusiveMonitorHeld_ = false;
+  exclusiveMonitor_ = 0;
+}
+
+bool Simulator::init() {
+  // Allocate 2MB for the stack. Note that we will only use 1MB, see below.
+  static const size_t stackSize = 2 * 1024 * 1024;
+  stack_ = js_pod_malloc<char>(stackSize);
+  if (!stack_) {
+    return false;
+  }
+
+  // Leave a safety margin of 1MB to prevent overrunning the stack when
+  // pushing values (total stack size is 2MB).
+  stackLimit_ = reinterpret_cast<uintptr_t>(stack_) + 1024 * 1024;
+
+  // The sp is initialized to point to the bottom (high address) of the
+  // allocated stack area. To be safe in potential stack underflows we leave
+  // some buffer below.
+  registers_[sp] = reinterpret_cast<int32_t>(stack_) + stackSize - 64;
+
+  return true;
+}
+
+// When the generated code calls a VM function (masm.callWithABI) we need to
+// call that function instead of trying to execute it with the simulator
+// (because it's x86 code instead of arm code). We do that by redirecting the VM
+// call to a svc (Supervisor Call) instruction that is handled by the
+// simulator. We write the original destination of the jump just at a known
+// offset from the svc instruction so the simulator knows what to call.
+class Redirection {
+  friend class SimulatorProcess;
+
+  // sim's lock must already be held.
+  Redirection(void* nativeFunction, ABIFunctionType type)
+      : nativeFunction_(nativeFunction),
+        swiInstruction_(Assembler::AL | (0xf * (1 << 24)) | kCallRtRedirected),
+        type_(type),
+        next_(nullptr) {
+    next_ = SimulatorProcess::redirection();
+    if (!SimulatorProcess::ICacheCheckingDisableCount) {
+      FlushICacheLocked(SimulatorProcess::icache(), addressOfSwiInstruction(),
+                        SimInstruction::kInstrSize);
+    }
+    SimulatorProcess::setRedirection(this);
+  }
+
+ public:
+  void* addressOfSwiInstruction() { return &swiInstruction_; }
+  void* nativeFunction() const { return nativeFunction_; }
+  ABIFunctionType type() const { return type_; }
+
+  static Redirection* Get(void* nativeFunction, ABIFunctionType type) {
+    AutoLockSimulatorCache als;
+
+    Redirection* current = SimulatorProcess::redirection();
+    for (; current != nullptr; current = current->next_) {
+      if (current->nativeFunction_ == nativeFunction) {
+        MOZ_ASSERT(current->type() == type);
+        return current;
+      }
+    }
+
+    // Note: we can't use js_new here because the constructor is private.
+    AutoEnterOOMUnsafeRegion oomUnsafe;
+    Redirection* redir = js_pod_malloc<Redirection>(1);
+    if (!redir) {
+      oomUnsafe.crash("Simulator redirection");
+    }
+    new (redir) Redirection(nativeFunction, type);
+    return redir;
+  }
+
+  static Redirection* FromSwiInstruction(SimInstruction* swiInstruction) {
+    uint8_t* addrOfSwi = reinterpret_cast<uint8_t*>(swiInstruction);
+    uint8_t* addrOfRedirection =
+        addrOfSwi - offsetof(Redirection, swiInstruction_);
+    return reinterpret_cast<Redirection*>(addrOfRedirection);
+  }
+
+ private:
+  void* nativeFunction_;
+  uint32_t swiInstruction_;
+  ABIFunctionType type_;
+  Redirection* next_;
+};
+
+Simulator::~Simulator() { js_free(stack_); }
+
+SimulatorProcess::SimulatorProcess()
+    : cacheLock_(mutexid::SimulatorCacheLock), redirection_(nullptr) {
+  if (getenv("ARM_SIM_ICACHE_CHECKS")) {
+    ICacheCheckingDisableCount = 0;
+  }
+}
+
+SimulatorProcess::~SimulatorProcess() {
+  Redirection* r = redirection_;
+  while (r) {
+    Redirection* next = r->next_;
+    js_delete(r);
+    r = next;
+  }
+}
+
+/* static */
+void* Simulator::RedirectNativeFunction(void* nativeFunction,
+                                        ABIFunctionType type) {
+  Redirection* redirection = Redirection::Get(nativeFunction, type);
+  return redirection->addressOfSwiInstruction();
+}
+
+// Sets the register in the architecture state. It will also deal with updating
+// Simulator internal state for special registers such as PC.
+void Simulator::set_register(int reg, int32_t value) {
+  MOZ_ASSERT(reg >= 0 && reg < num_registers);
+  if (reg == pc) {
+    pc_modified_ = true;
+  }
+  registers_[reg] = value;
+}
+
+// Get the register from the architecture state. This function does handle the
+// special case of accessing the PC register.
+int32_t Simulator::get_register(int reg) const {
+  MOZ_ASSERT(reg >= 0 && reg < num_registers);
+  // Work around GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43949
+  if (reg >= num_registers) return 0;
+  return registers_[reg] + ((reg == pc) ? SimInstruction::kPCReadOffset : 0);
+}
+
+double Simulator::get_double_from_register_pair(int reg) {
+  MOZ_ASSERT(reg >= 0 && reg < num_registers && (reg % 2) == 0);
+
+  // Read the bits from the unsigned integer register_[] array into the double
+  // precision floating point value and return it.
+  double dm_val = 0.0;
+  char buffer[2 * sizeof(vfp_registers_[0])];
+  memcpy(buffer, &registers_[reg], 2 * sizeof(registers_[0]));
+  memcpy(&dm_val, buffer, 2 * sizeof(registers_[0]));
+  return dm_val;
+}
+
+void Simulator::set_register_pair_from_double(int reg, double* value) {
+  MOZ_ASSERT(reg >= 0 && reg < num_registers && (reg % 2) == 0);
+  memcpy(registers_ + reg, value, sizeof(*value));
+}
+
+void Simulator::set_dw_register(int dreg, const int* dbl) {
+  MOZ_ASSERT(dreg >= 0 && dreg < num_d_registers);
+  registers_[dreg] = dbl[0];
+  registers_[dreg + 1] = dbl[1];
+}
+
+void Simulator::get_d_register(int dreg, uint64_t* value) {
+  MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
+  memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value));
+}
+
+void Simulator::set_d_register(int dreg, const uint64_t* value) {
+  MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
+  memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value));
+}
+
+void Simulator::get_d_register(int dreg, uint32_t* value) {
+  MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
+  memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
+}
+
+void Simulator::set_d_register(int dreg, const uint32_t* value) {
+  MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
+  memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
+}
+
+void Simulator::get_q_register(int qreg, uint64_t* value) {
+  MOZ_ASSERT(qreg >= 0 && qreg < num_q_registers);
+  memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 2);
+}
+
+void Simulator::set_q_register(int qreg, const uint64_t* value) {
+  MOZ_ASSERT(qreg >= 0 && qreg < num_q_registers);
+  memcpy(vfp_registers_ + qreg * 4, value, sizeof(*value) * 2);
+}
+
+void Simulator::get_q_register(int qreg, uint32_t* value) {
+  MOZ_ASSERT(qreg >= 0 && qreg < num_q_registers);
+  memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 4);
+}
+
+void Simulator::set_q_register(int qreg, const uint32_t* value) {
+  MOZ_ASSERT((qreg >= 0) && (qreg < num_q_registers));
+  memcpy(vfp_registers_ + qreg * 4, value, sizeof(*value) * 4);
+}
+
+void Simulator::set_pc(int32_t value) {
+  pc_modified_ = true;
+  registers_[pc] = value;
+}
+
+bool Simulator::has_bad_pc() const {
+  return registers_[pc] == bad_lr || registers_[pc] == end_sim_pc;
+}
+
+// Raw access to the PC register without the special adjustment when reading.
+int32_t Simulator::get_pc() const { return registers_[pc]; }
+
+void Simulator::set_s_register(int sreg, unsigned int value) {
+  MOZ_ASSERT(sreg >= 0 && sreg < num_s_registers);
+  vfp_registers_[sreg] = value;
+}
+
+unsigned Simulator::get_s_register(int sreg) const {
+  MOZ_ASSERT(sreg >= 0 && sreg < num_s_registers);
+  return vfp_registers_[sreg];
+}
+
+template <class InputType, int register_size>
+void Simulator::setVFPRegister(int reg_index, const InputType& value) {
+  MOZ_ASSERT(reg_index >= 0);
+  MOZ_ASSERT_IF(register_size == 1, reg_index < num_s_registers);
+  MOZ_ASSERT_IF(register_size == 2, reg_index < int(FloatRegisters::TotalPhys));
+
+  char buffer[register_size * sizeof(vfp_registers_[0])];
+  memcpy(buffer, &value, register_size * sizeof(vfp_registers_[0]));
+  memcpy(&vfp_registers_[reg_index * register_size], buffer,
+         register_size * sizeof(vfp_registers_[0]));
+}
+
+template <class ReturnType, int register_size>
+void Simulator::getFromVFPRegister(int reg_index, ReturnType* out) {
+  MOZ_ASSERT(reg_index >= 0);
+  MOZ_ASSERT_IF(register_size == 1, reg_index < num_s_registers);
+  MOZ_ASSERT_IF(register_size == 2, reg_index < int(FloatRegisters::TotalPhys));
+
+  char buffer[register_size * sizeof(vfp_registers_[0])];
+  memcpy(buffer, &vfp_registers_[register_size * reg_index],
+         register_size * sizeof(vfp_registers_[0]));
+  memcpy(out, buffer, register_size * sizeof(vfp_registers_[0]));
+}
+
+// These forced-instantiations are for jsapi-tests. Evidently, nothing
+// requires these to be instantiated.
+template void Simulator::getFromVFPRegister<double, 2>(int reg_index,
+                                                       double* out);
+template void Simulator::getFromVFPRegister<float, 1>(int reg_index,
+                                                      float* out);
+template void Simulator::setVFPRegister<double, 2>(int reg_index,
+                                                   const double& value);
+template void Simulator::setVFPRegister<float, 1>(int reg_index,
+                                                  const float& value);
+
+void Simulator::getFpArgs(double* x, double* y, int32_t* z) {
+  if (UseHardFpABI()) {
+    get_double_from_d_register(0, x);
+    get_double_from_d_register(1, y);
+    *z = get_register(0);
+  } else {
+    *x = get_double_from_register_pair(0);
+    *y = get_double_from_register_pair(2);
+    *z = get_register(2);
+  }
+}
+
+void Simulator::getFpFromStack(int32_t* stack, double* x) {
+  MOZ_ASSERT(stack && x);
+  char buffer[2 * sizeof(stack[0])];
+  memcpy(buffer, stack, 2 * sizeof(stack[0]));
+  memcpy(x, buffer, 2 * sizeof(stack[0]));
+}
+
+void Simulator::setCallResultDouble(double result) {
+  // The return value is either in r0/r1 or d0.
+  if (UseHardFpABI()) {
+    char buffer[2 * sizeof(vfp_registers_[0])];
+    memcpy(buffer, &result, sizeof(buffer));
+    // Copy result to d0.
+    memcpy(vfp_registers_, buffer, sizeof(buffer));
+  } else {
+    char buffer[2 * sizeof(registers_[0])];
+    memcpy(buffer, &result, sizeof(buffer));
+    // Copy result to r0 and r1.
+    memcpy(registers_, buffer, sizeof(buffer));
+  }
+}
+
+void Simulator::setCallResultFloat(float result) {
+  if (UseHardFpABI()) {
+    char buffer[sizeof(registers_[0])];
+    memcpy(buffer, &result, sizeof(buffer));
+    // Copy result to s0.
+    memcpy(vfp_registers_, buffer, sizeof(buffer));
+  } else {
+    char buffer[sizeof(registers_[0])];
+    memcpy(buffer, &result, sizeof(buffer));
+    // Copy result to r0.
+    memcpy(registers_, buffer, sizeof(buffer));
+  }
+}
+
+void Simulator::setCallResult(int64_t res) {
+  set_register(r0, static_cast<int32_t>(res));
+  set_register(r1, static_cast<int32_t>(res >> 32));
+}
+
+void Simulator::exclusiveMonitorSet(uint64_t value) {
+  exclusiveMonitor_ = value;
+  exclusiveMonitorHeld_ = true;
+}
+
+uint64_t Simulator::exclusiveMonitorGetAndClear(bool* held) {
+  *held = exclusiveMonitorHeld_;
+  exclusiveMonitorHeld_ = false;
+  return *held ? exclusiveMonitor_ : 0;
+}
+
+void Simulator::exclusiveMonitorClear() { exclusiveMonitorHeld_ = false; }
+
+JS::ProfilingFrameIterator::RegisterState Simulator::registerState() {
+  wasm::RegisterState state;
+  state.pc = (void*)get_pc();
+  state.fp = (void*)get_register(fp);
+  state.sp = (void*)get_register(sp);
+  state.lr = (void*)get_register(lr);
+  return state;
+}
+
+uint64_t Simulator::readQ(int32_t addr, SimInstruction* instr,
+                          UnalignedPolicy f) {
+  if (handleWasmSegFault(addr, 8)) {
+    return UINT64_MAX;
+  }
+
+  if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
+    uint64_t* ptr = reinterpret_cast<uint64_t*>(addr);
+    return *ptr;
+  }
+
+  // See the comments below in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    uint64_t value;
+    memcpy(&value, ptr, sizeof(value));
+    return value;
+  }
+
+  printf("Unaligned read at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+void Simulator::writeQ(int32_t addr, uint64_t value, SimInstruction* instr,
+                       UnalignedPolicy f) {
+  if (handleWasmSegFault(addr, 8)) {
+    return;
+  }
+
+  if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
+    uint64_t* ptr = reinterpret_cast<uint64_t*>(addr);
+    *ptr = value;
+    return;
+  }
+
+  // See the comments below in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    memcpy(ptr, &value, sizeof(value));
+    return;
+  }
+
+  printf("Unaligned write at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+int Simulator::readW(int32_t addr, SimInstruction* instr, UnalignedPolicy f) {
+  if (handleWasmSegFault(addr, 4)) {
+    return -1;
+  }
+
+  if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
+    intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
+    return *ptr;
+  }
+
+  // In WebAssembly, we want unaligned accesses to either raise a signal or
+  // do the right thing. Making this simulator properly emulate the behavior
+  // of raising a signal is complex, so as a special-case, when in wasm code,
+  // we just do the right thing.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    int value;
+    memcpy(&value, ptr, sizeof(value));
+    return value;
+  }
+
+  printf("Unaligned read at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+void Simulator::writeW(int32_t addr, int value, SimInstruction* instr,
+                       UnalignedPolicy f) {
+  if (handleWasmSegFault(addr, 4)) {
+    return;
+  }
+
+  if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
+    intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
+    *ptr = value;
+    return;
+  }
+
+  // See the comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    memcpy(ptr, &value, sizeof(value));
+    return;
+  }
+
+  printf("Unaligned write at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+// For the time being, define Relaxed operations in terms of SeqCst
+// operations - we don't yet need Relaxed operations anywhere else in
+// the system, and the distinction is not important to the simulation
+// at the level where we're operating.
+
+template <typename T>
+static T loadRelaxed(SharedMem<T*> addr) {
+  return AtomicOperations::loadSeqCst(addr);
+}
+
+template <typename T>
+static T compareExchangeRelaxed(SharedMem<T*> addr, T oldval, T newval) {
+  return AtomicOperations::compareExchangeSeqCst(addr, oldval, newval);
+}
+
+int Simulator::readExW(int32_t addr, SimInstruction* instr) {
+  if (addr & 3) {
+    MOZ_CRASH("Unaligned exclusive read");
+  }
+
+  if (handleWasmSegFault(addr, 4)) {
+    return -1;
+  }
+
+  SharedMem<int32_t*> ptr =
+      SharedMem<int32_t*>::shared(reinterpret_cast<int32_t*>(addr));
+  int32_t value = loadRelaxed(ptr);
+  exclusiveMonitorSet(value);
+  return value;
+}
+
+int32_t Simulator::writeExW(int32_t addr, int value, SimInstruction* instr) {
+  if (addr & 3) {
+    MOZ_CRASH("Unaligned exclusive write");
+  }
+
+  if (handleWasmSegFault(addr, 4)) {
+    return -1;
+  }
+
+  SharedMem<int32_t*> ptr =
+      SharedMem<int32_t*>::shared(reinterpret_cast<int32_t*>(addr));
+  bool held;
+  int32_t expected = int32_t(exclusiveMonitorGetAndClear(&held));
+  if (!held) {
+    return 1;
+  }
+  int32_t old = compareExchangeRelaxed(ptr, expected, int32_t(value));
+  return old != expected;
+}
+
+uint16_t Simulator::readHU(int32_t addr, SimInstruction* instr) {
+  if (handleWasmSegFault(addr, 2)) {
+    return UINT16_MAX;
+  }
+
+  // The regexp engine emits unaligned loads, so we don't check for them here
+  // like most of the other methods do.
+  if ((addr & 1) == 0 || !HasAlignmentFault()) {
+    uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
+    return *ptr;
+  }
+
+  // See comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    uint16_t value;
+    memcpy(&value, ptr, sizeof(value));
+    return value;
+  }
+
+  printf("Unaligned unsigned halfword read at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+  return 0;
+}
+
+int16_t Simulator::readH(int32_t addr, SimInstruction* instr) {
+  if (handleWasmSegFault(addr, 2)) {
+    return -1;
+  }
+
+  if ((addr & 1) == 0 || !HasAlignmentFault()) {
+    int16_t* ptr = reinterpret_cast<int16_t*>(addr);
+    return *ptr;
+  }
+
+  // See comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    int16_t value;
+    memcpy(&value, ptr, sizeof(value));
+    return value;
+  }
+
+  printf("Unaligned signed halfword read at 0x%08x\n", addr);
+  MOZ_CRASH();
+  return 0;
+}
+
+void Simulator::writeH(int32_t addr, uint16_t value, SimInstruction* instr) {
+  if (handleWasmSegFault(addr, 2)) {
+    return;
+  }
+
+  if ((addr & 1) == 0 || !HasAlignmentFault()) {
+    uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
+    *ptr = value;
+    return;
+  }
+
+  // See the comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    memcpy(ptr, &value, sizeof(value));
+    return;
+  }
+
+  printf("Unaligned unsigned halfword write at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+void Simulator::writeH(int32_t addr, int16_t value, SimInstruction* instr) {
+  if (handleWasmSegFault(addr, 2)) {
+    return;
+  }
+
+  if ((addr & 1) == 0 || !HasAlignmentFault()) {
+    int16_t* ptr = reinterpret_cast<int16_t*>(addr);
+    *ptr = value;
+    return;
+  }
+
+  // See the comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    memcpy(ptr, &value, sizeof(value));
+    return;
+  }
+
+  printf("Unaligned halfword write at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+uint16_t Simulator::readExHU(int32_t addr, SimInstruction* instr) {
+  if (addr & 1) {
+    MOZ_CRASH("Unaligned exclusive read");
+  }
+
+  if (handleWasmSegFault(addr, 2)) {
+    return UINT16_MAX;
+  }
+
+  SharedMem<uint16_t*> ptr =
+      SharedMem<uint16_t*>::shared(reinterpret_cast<uint16_t*>(addr));
+  uint16_t value = loadRelaxed(ptr);
+  exclusiveMonitorSet(value);
+  return value;
+}
+
+int32_t Simulator::writeExH(int32_t addr, uint16_t value,
+                            SimInstruction* instr) {
+  if (addr & 1) {
+    MOZ_CRASH("Unaligned exclusive write");
+  }
+
+  if (handleWasmSegFault(addr, 2)) {
+    return -1;
+  }
+
+  SharedMem<uint16_t*> ptr =
+      SharedMem<uint16_t*>::shared(reinterpret_cast<uint16_t*>(addr));
+  bool held;
+  uint16_t expected = uint16_t(exclusiveMonitorGetAndClear(&held));
+  if (!held) {
+    return 1;
+  }
+  uint16_t old = compareExchangeRelaxed(ptr, expected, value);
+  return old != expected;
+}
+
+uint8_t Simulator::readBU(int32_t addr) {
+  if (handleWasmSegFault(addr, 1)) {
+    return UINT8_MAX;
+  }
+
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
+  return *ptr;
+}
+
+uint8_t Simulator::readExBU(int32_t addr) {
+  if (handleWasmSegFault(addr, 1)) {
+    return UINT8_MAX;
+  }
+
+  SharedMem<uint8_t*> ptr =
+      SharedMem<uint8_t*>::shared(reinterpret_cast<uint8_t*>(addr));
+  uint8_t value = loadRelaxed(ptr);
+  exclusiveMonitorSet(value);
+  return value;
+}
+
+int32_t Simulator::writeExB(int32_t addr, uint8_t value) {
+  if (handleWasmSegFault(addr, 1)) {
+    return -1;
+  }
+
+  SharedMem<uint8_t*> ptr =
+      SharedMem<uint8_t*>::shared(reinterpret_cast<uint8_t*>(addr));
+  bool held;
+  uint8_t expected = uint8_t(exclusiveMonitorGetAndClear(&held));
+  if (!held) {
+    return 1;
+  }
+  uint8_t old = compareExchangeRelaxed(ptr, expected, value);
+  return old != expected;
+}
+
+int8_t Simulator::readB(int32_t addr) {
+  if (handleWasmSegFault(addr, 1)) {
+    return -1;
+  }
+
+  int8_t* ptr = reinterpret_cast<int8_t*>(addr);
+  return *ptr;
+}
+
+void Simulator::writeB(int32_t addr, uint8_t value) {
+  if (handleWasmSegFault(addr, 1)) {
+    return;
+  }
+
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
+  *ptr = value;
+}
+
+void Simulator::writeB(int32_t addr, int8_t value) {
+  if (handleWasmSegFault(addr, 1)) {
+    return;
+  }
+
+  int8_t* ptr = reinterpret_cast<int8_t*>(addr);
+  *ptr = value;
+}
+
+int32_t* Simulator::readDW(int32_t addr) {
+  if (handleWasmSegFault(addr, 8)) {
+    return nullptr;
+  }
+
+  if ((addr & 3) == 0) {
+    int32_t* ptr = reinterpret_cast<int32_t*>(addr);
+    return ptr;
+  }
+
+  printf("Unaligned read at 0x%08x\n", addr);
+  MOZ_CRASH();
+}
+
+void Simulator::writeDW(int32_t addr, int32_t value1, int32_t value2) {
+  if (handleWasmSegFault(addr, 8)) {
+    return;
+  }
+
+  if ((addr & 3) == 0) {
+    int32_t* ptr = reinterpret_cast<int32_t*>(addr);
+    *ptr++ = value1;
+    *ptr = value2;
+    return;
+  }
+
+  printf("Unaligned write at 0x%08x\n", addr);
+  MOZ_CRASH();
+}
+
+int32_t Simulator::readExDW(int32_t addr, int32_t* hibits) {
+  if (addr & 3) {
+    MOZ_CRASH("Unaligned exclusive read");
+  }
+
+  if (handleWasmSegFault(addr, 8)) {
+    return -1;
+  }
+
+  SharedMem<uint64_t*> ptr =
+      SharedMem<uint64_t*>::shared(reinterpret_cast<uint64_t*>(addr));
+  // The spec says that the low part of value shall be read from addr and
+  // the high part shall be read from addr+4.  On a little-endian system
+  // where we read a 64-bit quadword the low part of the value will be in
+  // the low part of the quadword, and the high part of the value in the
+  // high part of the quadword.
+  uint64_t value = loadRelaxed(ptr);
+  exclusiveMonitorSet(value);
+  *hibits = int32_t(value >> 32);
+  return int32_t(value);
+}
+
+int32_t Simulator::writeExDW(int32_t addr, int32_t value1, int32_t value2) {
+  if (addr & 3) {
+    MOZ_CRASH("Unaligned exclusive write");
+  }
+
+  if (handleWasmSegFault(addr, 8)) {
+    return -1;
+  }
+
+  SharedMem<uint64_t*> ptr =
+      SharedMem<uint64_t*>::shared(reinterpret_cast<uint64_t*>(addr));
+  // The spec says that value1 shall be stored at addr and value2 at
+  // addr+4.  On a little-endian system that means constructing a 64-bit
+  // value where value1 is in the low half of a 64-bit quadword and value2
+  // is in the high half of the quadword.
+  uint64_t value = (uint64_t(value2) << 32) | uint32_t(value1);
+  bool held;
+  uint64_t expected = exclusiveMonitorGetAndClear(&held);
+  if (!held) {
+    return 1;
+  }
+  uint64_t old = compareExchangeRelaxed(ptr, expected, value);
+  return old != expected;
+}
+
+uintptr_t Simulator::stackLimit() const { return stackLimit_; }
+
+uintptr_t* Simulator::addressOfStackLimit() { return &stackLimit_; }
+
+bool Simulator::overRecursed(uintptr_t newsp) const {
+  if (newsp == 0) {
+    newsp = get_register(sp);
+  }
+  return newsp <= stackLimit();
+}
+
+bool Simulator::overRecursedWithExtra(uint32_t extra) const {
+  uintptr_t newsp = get_register(sp) - extra;
+  return newsp <= stackLimit();
+}
+
+// Checks if the current instruction should be executed based on its condition
+// bits.
+bool Simulator::conditionallyExecute(SimInstruction* instr) {
+  switch (instr->conditionField()) {
+    case Assembler::EQ:
+      return z_flag_;
+    case Assembler::NE:
+      return !z_flag_;
+    case Assembler::CS:
+      return c_flag_;
+    case Assembler::CC:
+      return !c_flag_;
+    case Assembler::MI:
+      return n_flag_;
+    case Assembler::PL:
+      return !n_flag_;
+    case Assembler::VS:
+      return v_flag_;
+    case Assembler::VC:
+      return !v_flag_;
+    case Assembler::HI:
+      return c_flag_ && !z_flag_;
+    case Assembler::LS:
+      return !c_flag_ || z_flag_;
+    case Assembler::GE:
+      return n_flag_ == v_flag_;
+    case Assembler::LT:
+      return n_flag_ != v_flag_;
+    case Assembler::GT:
+      return !z_flag_ && (n_flag_ == v_flag_);
+    case Assembler::LE:
+      return z_flag_ || (n_flag_ != v_flag_);
+    case Assembler::AL:
+      return true;
+    default:
+      MOZ_CRASH();
+  }
+  return false;
+}
+
+// Calculate and set the Negative and Zero flags.
+void Simulator::setNZFlags(int32_t val) {
+  n_flag_ = (val < 0);
+  z_flag_ = (val == 0);
+}
+
+// Set the Carry flag.
+void Simulator::setCFlag(bool val) { c_flag_ = val; }
+
+// Set the oVerflow flag.
+void Simulator::setVFlag(bool val) { v_flag_ = val; }
+
+// Calculate C flag value for additions.
+bool Simulator::carryFrom(int32_t left, int32_t right, int32_t carry) {
+  uint32_t uleft = static_cast<uint32_t>(left);
+  uint32_t uright = static_cast<uint32_t>(right);
+  uint32_t urest = 0xffffffffU - uleft;
+  return (uright > urest) ||
+         (carry && (((uright + 1) > urest) || (uright > (urest - 1))));
+}
+
+// Calculate C flag value for subtractions.
+bool Simulator::borrowFrom(int32_t left, int32_t right) {
+  uint32_t uleft = static_cast<uint32_t>(left);
+  uint32_t uright = static_cast<uint32_t>(right);
+  return (uright > uleft);
+}
+
+// Calculate V flag value for additions and subtractions.
+bool Simulator::overflowFrom(int32_t alu_out, int32_t left, int32_t right,
+                             bool addition) {
+  bool overflow;
+  if (addition) {
+    // Operands have the same sign.
+    overflow = ((left >= 0 && right >= 0) || (left < 0 && right < 0))
+               // And operands and result have different sign.
+               && ((left < 0 && alu_out >= 0) || (left >= 0 && alu_out < 0));
+  } else {
+    // Operands have different signs.
+    overflow = ((left < 0 && right >= 0) || (left >= 0 && right < 0))
+               // And first operand and result have different signs.
+               && ((left < 0 && alu_out >= 0) || (left >= 0 && alu_out < 0));
+  }
+  return overflow;
+}
+
+// Support for VFP comparisons.
+void Simulator::compute_FPSCR_Flags(double val1, double val2) {
+  if (std::isnan(val1) || std::isnan(val2)) {
+    n_flag_FPSCR_ = false;
+    z_flag_FPSCR_ = false;
+    c_flag_FPSCR_ = true;
+    v_flag_FPSCR_ = true;
+    // All non-NaN cases.
+  } else if (val1 == val2) {
+    n_flag_FPSCR_ = false;
+    z_flag_FPSCR_ = true;
+    c_flag_FPSCR_ = true;
+    v_flag_FPSCR_ = false;
+  } else if (val1 < val2) {
+    n_flag_FPSCR_ = true;
+    z_flag_FPSCR_ = false;
+    c_flag_FPSCR_ = false;
+    v_flag_FPSCR_ = false;
+  } else {
+    // Case when (val1 > val2).
+    n_flag_FPSCR_ = false;
+    z_flag_FPSCR_ = false;
+    c_flag_FPSCR_ = true;
+    v_flag_FPSCR_ = false;
+  }
+}
+
+void Simulator::copy_FPSCR_to_APSR() {
+  n_flag_ = n_flag_FPSCR_;
+  z_flag_ = z_flag_FPSCR_;
+  c_flag_ = c_flag_FPSCR_;
+  v_flag_ = v_flag_FPSCR_;
+}
+
+// Addressing Mode 1 - Data-processing operands:
+// Get the value based on the shifter_operand with register.
+int32_t Simulator::getShiftRm(SimInstruction* instr, bool* carry_out) {
+  ShiftType shift = instr->shifttypeValue();
+  int shift_amount = instr->shiftAmountValue();
+  int32_t result = get_register(instr->rmValue());
+  if (instr->bit(4) == 0) {
+    // By immediate.
+    if (shift == ROR && shift_amount == 0) {
+      MOZ_CRASH("NYI");
+      return result;
+    }
+    if ((shift == LSR || shift == ASR) && shift_amount == 0) {
+      shift_amount = 32;
+    }
+    switch (shift) {
+      case ASR: {
+        if (shift_amount == 0) {
+          if (result < 0) {
+            result = 0xffffffff;
+            *carry_out = true;
+          } else {
+            result = 0;
+            *carry_out = false;
+          }
+        } else {
+          result >>= (shift_amount - 1);
+          *carry_out = (result & 1) == 1;
+          result >>= 1;
+        }
+        break;
+      }
+
+      case LSL: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else {
+          result <<= (shift_amount - 1);
+          *carry_out = (result < 0);
+          result <<= 1;
+        }
+        break;
+      }
+
+      case LSR: {
+        if (shift_amount == 0) {
+          result = 0;
+          *carry_out = c_flag_;
+        } else {
+          uint32_t uresult = static_cast<uint32_t>(result);
+          uresult >>= (shift_amount - 1);
+          *carry_out = (uresult & 1) == 1;
+          uresult >>= 1;
+          result = static_cast<int32_t>(uresult);
+        }
+        break;
+      }
+
+      case ROR: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else {
+          uint32_t left = static_cast<uint32_t>(result) >> shift_amount;
+          uint32_t right = static_cast<uint32_t>(result) << (32 - shift_amount);
+          result = right | left;
+          *carry_out = (static_cast<uint32_t>(result) >> 31) != 0;
+        }
+        break;
+      }
+
+      default:
+        MOZ_CRASH();
+    }
+  } else {
+    // By register.
+    int rs = instr->rsValue();
+    shift_amount = get_register(rs) & 0xff;
+    switch (shift) {
+      case ASR: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else if (shift_amount < 32) {
+          result >>= (shift_amount - 1);
+          *carry_out = (result & 1) == 1;
+          result >>= 1;
+        } else {
+          MOZ_ASSERT(shift_amount >= 32);
+          if (result < 0) {
+            *carry_out = true;
+            result = 0xffffffff;
+          } else {
+            *carry_out = false;
+            result = 0;
+          }
+        }
+        break;
+      }
+
+      case LSL: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else if (shift_amount < 32) {
+          result <<= (shift_amount - 1);
+          *carry_out = (result < 0);
+          result <<= 1;
+        } else if (shift_amount == 32) {
+          *carry_out = (result & 1) == 1;
+          result = 0;
+        } else {
+          MOZ_ASSERT(shift_amount > 32);
+          *carry_out = false;
+          result = 0;
+        }
+        break;
+      }
+
+      case LSR: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else if (shift_amount < 32) {
+          uint32_t uresult = static_cast<uint32_t>(result);
+          uresult >>= (shift_amount - 1);
+          *carry_out = (uresult & 1) == 1;
+          uresult >>= 1;
+          result = static_cast<int32_t>(uresult);
+        } else if (shift_amount == 32) {
+          *carry_out = (result < 0);
+          result = 0;
+        } else {
+          *carry_out = false;
+          result = 0;
+        }
+        break;
+      }
+
+      case ROR: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else {
+          uint32_t left = static_cast<uint32_t>(result) >> shift_amount;
+          uint32_t right = static_cast<uint32_t>(result) << (32 - shift_amount);
+          result = right | left;
+          *carry_out = (static_cast<uint32_t>(result) >> 31) != 0;
+        }
+        break;
+      }
+
+      default:
+        MOZ_CRASH();
+    }
+  }
+  return result;
+}
+
+// Addressing Mode 1 - Data-processing operands:
+// Get the value based on the shifter_operand with immediate.
+int32_t Simulator::getImm(SimInstruction* instr, bool* carry_out) {
+  int rotate = instr->rotateValue() * 2;
+  int immed8 = instr->immed8Value();
+  int imm = (immed8 >> rotate) | (immed8 << (32 - rotate));
+  *carry_out = (rotate == 0) ? c_flag_ : (imm < 0);
+  return imm;
+}
+
+int32_t Simulator::processPU(SimInstruction* instr, int num_regs, int reg_size,
+                             intptr_t* start_address, intptr_t* end_address) {
+  int rn = instr->rnValue();
+  int32_t rn_val = get_register(rn);
+  switch (instr->PUField()) {
+    case da_x:
+      MOZ_CRASH();
+      break;
+    case ia_x:
+      *start_address = rn_val;
+      *end_address = rn_val + (num_regs * reg_size) - reg_size;
+      rn_val = rn_val + (num_regs * reg_size);
+      break;
+    case db_x:
+      *start_address = rn_val - (num_regs * reg_size);
+      *end_address = rn_val - reg_size;
+      rn_val = *start_address;
+      break;
+    case ib_x:
+      *start_address = rn_val + reg_size;
+      *end_address = rn_val + (num_regs * reg_size);
+      rn_val = *end_address;
+      break;
+    default:
+      MOZ_CRASH();
+  }
+  return rn_val;
+}
+
+// Addressing Mode 4 - Load and Store Multiple
+void Simulator::handleRList(SimInstruction* instr, bool load) {
+  int rlist = instr->rlistValue();
+  int num_regs = mozilla::CountPopulation32(rlist);
+
+  intptr_t start_address = 0;
+  intptr_t end_address = 0;
+  int32_t rn_val =
+      processPU(instr, num_regs, sizeof(void*), &start_address, &end_address);
+  intptr_t* address = reinterpret_cast<intptr_t*>(start_address);
+
+  // Catch null pointers a little earlier.
+  MOZ_ASSERT(start_address > 8191 || start_address < 0);
+
+  int reg = 0;
+  while (rlist != 0) {
+    if ((rlist & 1) != 0) {
+      if (load) {
+        set_register(reg, *address);
+      } else {
+        *address = get_register(reg);
+      }
+      address += 1;
+    }
+    reg++;
+    rlist >>= 1;
+  }
+  MOZ_ASSERT(end_address == ((intptr_t)address) - 4);
+  if (instr->hasW()) {
+    set_register(instr->rnValue(), rn_val);
+  }
+}
+
+// Addressing Mode 6 - Load and Store Multiple Coprocessor registers.
+void Simulator::handleVList(SimInstruction* instr) {
+  VFPRegPrecision precision =
+      (instr->szValue() == 0) ? kSinglePrecision : kDoublePrecision;
+  int operand_size = (precision == kSinglePrecision) ? 4 : 8;
+  bool load = (instr->VLValue() == 0x1);
+
+  int vd;
+  int num_regs;
+  vd = instr->VFPDRegValue(precision);
+  if (precision == kSinglePrecision) {
+    num_regs = instr->immed8Value();
+  } else {
+    num_regs = instr->immed8Value() / 2;
+  }
+
+  intptr_t start_address = 0;
+  intptr_t end_address = 0;
+  int32_t rn_val =
+      processPU(instr, num_regs, operand_size, &start_address, &end_address);
+
+  intptr_t* address = reinterpret_cast<intptr_t*>(start_address);
+  for (int reg = vd; reg < vd + num_regs; reg++) {
+    if (precision == kSinglePrecision) {
+      if (load) {
+        set_s_register_from_sinteger(
+            reg, readW(reinterpret_cast<int32_t>(address), instr));
+      } else {
+        writeW(reinterpret_cast<int32_t>(address),
+               get_sinteger_from_s_register(reg), instr);
+      }
+      address += 1;
+    } else {
+      if (load) {
+        int32_t data[] = {readW(reinterpret_cast<int32_t>(address), instr),
+                          readW(reinterpret_cast<int32_t>(address + 1), instr)};
+        double d;
+        memcpy(&d, data, 8);
+        set_d_register_from_double(reg, d);
+      } else {
+        int32_t data[2];
+        double d;
+        get_double_from_d_register(reg, &d);
+        memcpy(data, &d, 8);
+        writeW(reinterpret_cast<int32_t>(address), data[0], instr);
+        writeW(reinterpret_cast<int32_t>(address + 1), data[1], instr);
+      }
+      address += 2;
+    }
+  }
+  MOZ_ASSERT(reinterpret_cast<intptr_t>(address) - operand_size == end_address);
+  if (instr->hasW()) {
+    set_register(instr->rnValue(), rn_val);
+  }
+}
+
+// Note: With the code below we assume that all runtime calls return a 64 bits
+// result. If they don't, the r1 result register contains a bogus value, which
+// is fine because it is caller-saved.
+typedef int64_t (*Prototype_General0)();
+typedef int64_t (*Prototype_General1)(int32_t arg0);
+typedef int64_t (*Prototype_General2)(int32_t arg0, int32_t arg1);
+typedef int64_t (*Prototype_General3)(int32_t arg0, int32_t arg1, int32_t arg2);
+typedef int64_t (*Prototype_General4)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3);
+typedef int64_t (*Prototype_General5)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3, int32_t arg4);
+typedef int64_t (*Prototype_General6)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3, int32_t arg4, int32_t arg5);
+typedef int64_t (*Prototype_General7)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3, int32_t arg4, int32_t arg5,
+                                      int32_t arg6);
+typedef int64_t (*Prototype_General8)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3, int32_t arg4, int32_t arg5,
+                                      int32_t arg6, int32_t arg7);
+typedef int64_t (*Prototype_GeneralGeneralGeneralInt64)(int32_t arg0,
+                                                        int32_t arg1,
+                                                        int32_t arg2,
+                                                        int64_t arg3);
+typedef int64_t (*Prototype_GeneralGeneralInt64Int64)(int32_t arg0,
+                                                      int32_t arg1,
+                                                      int64_t arg2,
+                                                      int64_t arg3);
+
+typedef double (*Prototype_Double_None)();
+typedef double (*Prototype_Double_Double)(double arg0);
+typedef double (*Prototype_Double_Int)(int32_t arg0);
+typedef double (*Prototype_Double_IntInt)(int32_t arg0, int32_t arg1);
+typedef int32_t (*Prototype_Int_Double)(double arg0);
+typedef int64_t (*Prototype_Int64_Double)(double arg0);
+typedef int32_t (*Prototype_Int_DoubleIntInt)(double arg0, int32_t arg1,
+                                              int32_t arg2);
+typedef int32_t (*Prototype_Int_IntDoubleIntInt)(int32_t arg0, double arg1,
+                                                 int32_t arg2, int32_t arg3);
+
+typedef int32_t (*Prototype_Int_Float32)(float arg0);
+typedef float (*Prototype_Float32_Float32)(float arg0);
+typedef float (*Prototype_Float32_Float32Float32)(float arg0, float arg1);
+typedef float (*Prototype_Float32_IntInt)(int arg0, int arg1);
+
+typedef double (*Prototype_Double_DoubleInt)(double arg0, int32_t arg1);
+typedef double (*Prototype_Double_IntDouble)(int32_t arg0, double arg1);
+typedef double (*Prototype_Double_DoubleDouble)(double arg0, double arg1);
+typedef int32_t (*Prototype_Int_IntDouble)(int32_t arg0, double arg1);
+typedef int32_t (*Prototype_Int_DoubleInt)(double arg0, int32_t arg1);
+
+typedef double (*Prototype_Double_DoubleDoubleDouble)(double arg0, double arg1,
+                                                      double arg2);
+typedef double (*Prototype_Double_DoubleDoubleDoubleDouble)(double arg0,
+                                                            double arg1,
+                                                            double arg2,
+                                                            double arg3);
+
+typedef int32_t (*Prototype_Int32_General)(int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32)(int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32)(int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32)(int32_t, int32_t,
+                                                               int32_t, int32_t,
+                                                               int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32)(
+    int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32General)(
+    int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General)(
+    int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (
+    *Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General)(
+    int32_t, int32_t, float, float, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (
+    *Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General)(
+    int32_t, int32_t, float, float, float, float, int32_t, int32_t, int32_t,
+    int32_t, int32_t);
+typedef int32_t (
+    *Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General)(
+    int32_t, int32_t, float, float, int32_t, float, float, int32_t, float,
+    int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32General)(
+    int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int64)(int32_t, int32_t,
+                                                          int32_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32General)(int32_t, int32_t,
+                                                            int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int64Int64)(int32_t, int32_t,
+                                                          int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32)(int32_t, int32_t,
+                                                            int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32Int32)(
+    int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneral)(int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralGeneral)(int32_t, int32_t,
+                                                         int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32Int32)(int32_t, int32_t,
+                                                            int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int32Int32)(int32_t, int64_t,
+                                                               int32_t, int32_t,
+                                                               int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32)(int32_t, int64_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64)(int32_t, int64_t,
+                                                          int32_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64General)(
+    int32_t, int64_t, int32_t, int64_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64)(int32_t, int64_t,
+                                                          int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64General)(int32_t, int64_t,
+                                                            int64_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64General)(
+    int32_t, int64_t, int64_t, int64_t, int32_t);
+typedef int32_t (*Prototype_General_GeneralInt32)(int32_t, int32_t);
+typedef int32_t (*Prototype_General_GeneralInt32Int32)(int32_t, int32_t,
+                                                       int32_t);
+typedef int32_t (*Prototype_General_GeneralInt32General)(int32_t, int32_t,
+                                                         int32_t);
+typedef int32_t (*Prototype_General_GeneralInt32Int32GeneralInt32)(
+    int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32General)(int32_t, int32_t,
+                                                              int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32)(
+    int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int64_t (*Prototype_Int64_General)(int32_t);
+typedef int64_t (*Prototype_Int64_GeneralInt64)(int32_t, int64_t);
+
+// Fill the volatile registers with scratch values.
+//
+// Some of the ABI calls assume that the float registers are not scratched,
+// even though the ABI defines them as volatile - a performance
+// optimization. These are all calls passing operands in integer registers,
+// so for now the simulator does not scratch any float registers for these
+// calls. Should try to narrow it further in future.
+//
+void Simulator::scratchVolatileRegisters(bool scratchFloat) {
+  int32_t scratch_value = 0xa5a5a5a5 ^ uint32_t(icount_);
+  set_register(r0, scratch_value);
+  set_register(r1, scratch_value);
+  set_register(r2, scratch_value);
+  set_register(r3, scratch_value);
+  set_register(r12, scratch_value);  // Intra-Procedure-call scratch register.
+  set_register(r14, scratch_value);  // Link register.
+
+  if (scratchFloat) {
+    uint64_t scratch_value_d =
+        0x5a5a5a5a5a5a5a5aLU ^ uint64_t(icount_) ^ (uint64_t(icount_) << 30);
+    for (uint32_t i = d0; i < d8; i++) {
+      set_d_register(i, &scratch_value_d);
+    }
+    for (uint32_t i = d16; i < FloatRegisters::TotalPhys; i++) {
+      set_d_register(i, &scratch_value_d);
+    }
+  }
+}
+
+static int64_t MakeInt64(int32_t first, int32_t second) {
+  // Little-endian order.
+  return ((int64_t)second << 32) | (uint32_t)first;
+}
+
+// Software interrupt instructions are used by the simulator to call into C++.
+void Simulator::softwareInterrupt(SimInstruction* instr) {
+  int svc = instr->svcValue();
+  switch (svc) {
+    case kCallRtRedirected: {
+      Redirection* redirection = Redirection::FromSwiInstruction(instr);
+      int32_t arg0 = get_register(r0);
+      int32_t arg1 = get_register(r1);
+      int32_t arg2 = get_register(r2);
+      int32_t arg3 = get_register(r3);
+      int32_t* stack_pointer = reinterpret_cast<int32_t*>(get_register(sp));
+      int32_t arg4 = stack_pointer[0];
+      int32_t arg5 = stack_pointer[1];
+      int32_t arg6 = stack_pointer[2];
+      int32_t arg7 = stack_pointer[3];
+      int32_t arg8 = stack_pointer[4];
+      int32_t arg9 = stack_pointer[5];
+      int32_t arg10 = stack_pointer[6];
+      int32_t arg11 = stack_pointer[7];
+      int32_t arg12 = stack_pointer[8];
+      int32_t arg13 = stack_pointer[9];
+
+      int32_t saved_lr = get_register(lr);
+      intptr_t external =
+          reinterpret_cast<intptr_t>(redirection->nativeFunction());
+
+      bool stack_aligned = (get_register(sp) & (ABIStackAlignment - 1)) == 0;
+      if (!stack_aligned) {
+        fprintf(stderr, "Runtime call with unaligned stack!\n");
+        MOZ_CRASH();
+      }
+
+      if (single_stepping_) {
+        single_step_callback_(single_step_callback_arg_, this, nullptr);
+      }
+
+      switch (redirection->type()) {
+        case Args_General0: {
+          Prototype_General0 target =
+              reinterpret_cast<Prototype_General0>(external);
+          int64_t result = target();
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General1: {
+          Prototype_General1 target =
+              reinterpret_cast<Prototype_General1>(external);
+          int64_t result = target(arg0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General2: {
+          Prototype_General2 target =
+              reinterpret_cast<Prototype_General2>(external);
+          int64_t result = target(arg0, arg1);
+          // The ARM backend makes calls to __aeabi_idivmod and
+          // __aeabi_uidivmod assuming that the float registers are
+          // non-volatile as a performance optimization, so the float
+          // registers must not be scratch when calling these.
+          bool scratchFloat =
+              target != __aeabi_idivmod && target != __aeabi_uidivmod;
+          scratchVolatileRegisters(/* scratchFloat = */ scratchFloat);
+          setCallResult(result);
+          break;
+        }
+        case Args_General3: {
+          Prototype_General3 target =
+              reinterpret_cast<Prototype_General3>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true*/);
+          setCallResult(result);
+          break;
+        }
+        case Args_General4: {
+          Prototype_General4 target =
+              reinterpret_cast<Prototype_General4>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true*/);
+          setCallResult(result);
+          break;
+        }
+        case Args_General5: {
+          Prototype_General5 target =
+              reinterpret_cast<Prototype_General5>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General6: {
+          Prototype_General6 target =
+              reinterpret_cast<Prototype_General6>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General7: {
+          Prototype_General7 target =
+              reinterpret_cast<Prototype_General7>(external);
+          int32_t arg6 = stack_pointer[2];
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5, arg6);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General8: {
+          Prototype_General8 target =
+              reinterpret_cast<Prototype_General8>(external);
+          int32_t arg6 = stack_pointer[2];
+          int32_t arg7 = stack_pointer[3];
+          int64_t result =
+              target(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int_GeneralGeneralGeneralInt64: {
+          Prototype_GeneralGeneralGeneralInt64 target =
+              reinterpret_cast<Prototype_GeneralGeneralGeneralInt64>(external);
+          // The int64 arg is not split across register and stack
+          int64_t result = target(arg0, arg1, arg2, MakeInt64(arg4, arg5));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int_GeneralGeneralInt64Int64: {
+          Prototype_GeneralGeneralInt64Int64 target =
+              reinterpret_cast<Prototype_GeneralGeneralInt64Int64>(external);
+          int64_t result =
+              target(arg0, arg1, MakeInt64(arg2, arg3), MakeInt64(arg4, arg5));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int64_Double: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Int64_Double target =
+              reinterpret_cast<Prototype_Int64_Double>(external);
+          int64_t result = target(dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Double_None: {
+          Prototype_Double_None target =
+              reinterpret_cast<Prototype_Double_None>(external);
+          double dresult = target();
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Int_Double: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Int_Double target =
+              reinterpret_cast<Prototype_Int_Double>(external);
+          int32_t res = target(dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, res);
+          break;
+        }
+        case Args_Int_Float32: {
+          float fval0;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(0, &fval0);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg0);
+          }
+          auto target = reinterpret_cast<Prototype_Int_Float32>(external);
+          int32_t res = target(fval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, res);
+          break;
+        }
+        case Args_Double_Double: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Double_Double target =
+              reinterpret_cast<Prototype_Double_Double>(external);
+          double dresult = target(dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Float32_Float32: {
+          float fval0;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(0, &fval0);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg0);
+          }
+          Prototype_Float32_Float32 target =
+              reinterpret_cast<Prototype_Float32_Float32>(external);
+          float fresult = target(fval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultFloat(fresult);
+          break;
+        }
+        case Args_Float32_Float32Float32: {
+          float fval0, fval1;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(0, &fval0);
+            get_float_from_s_register(1, &fval1);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg0);
+            fval1 = mozilla::BitwiseCast<float>(arg1);
+          }
+          Prototype_Float32_Float32Float32 target =
+              reinterpret_cast<Prototype_Float32_Float32Float32>(external);
+          float fresult = target(fval0, fval1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultFloat(fresult);
+          break;
+        }
+        case Args_Float32_IntInt: {
+          Prototype_Float32_IntInt target =
+              reinterpret_cast<Prototype_Float32_IntInt>(external);
+          float fresult = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultFloat(fresult);
+          break;
+        }
+        case Args_Double_Int: {
+          Prototype_Double_Int target =
+              reinterpret_cast<Prototype_Double_Int>(external);
+          double dresult = target(arg0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_IntInt: {
+          Prototype_Double_IntInt target =
+              reinterpret_cast<Prototype_Double_IntInt>(external);
+          double dresult = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_DoubleInt: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Double_DoubleInt target =
+              reinterpret_cast<Prototype_Double_DoubleInt>(external);
+          double dresult = target(dval0, ival);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_DoubleDouble: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Double_DoubleDouble target =
+              reinterpret_cast<Prototype_Double_DoubleDouble>(external);
+          double dresult = target(dval0, dval1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_IntDouble: {
+          int32_t ival = get_register(0);
+          double dval0;
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval0);
+          } else {
+            dval0 = get_double_from_register_pair(2);
+          }
+          Prototype_Double_IntDouble target =
+              reinterpret_cast<Prototype_Double_IntDouble>(external);
+          double dresult = target(ival, dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Int_IntDouble: {
+          int32_t ival = get_register(0);
+          double dval0;
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval0);
+          } else {
+            dval0 = get_double_from_register_pair(2);
+          }
+          Prototype_Int_IntDouble target =
+              reinterpret_cast<Prototype_Int_IntDouble>(external);
+          int32_t result = target(ival, dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, result);
+          break;
+        }
+        case Args_Int_DoubleInt: {
+          double dval;
+          int32_t result;
+          Prototype_Int_DoubleInt target =
+              reinterpret_cast<Prototype_Int_DoubleInt>(external);
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval);
+            result = target(dval, arg0);
+          } else {
+            dval = get_double_from_register_pair(0);
+            result = target(dval, arg2);
+          }
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, result);
+          break;
+        }
+        case Args_Int_DoubleIntInt: {
+          double dval;
+          int32_t result;
+          Prototype_Int_DoubleIntInt target =
+              reinterpret_cast<Prototype_Int_DoubleIntInt>(external);
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval);
+            result = target(dval, arg0, arg1);
+          } else {
+            dval = get_double_from_register_pair(0);
+            result = target(dval, arg2, arg3);
+          }
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, result);
+          break;
+        }
+        case Args_Int_IntDoubleIntInt: {
+          double dval;
+          int32_t result;
+          Prototype_Int_IntDoubleIntInt target =
+              reinterpret_cast<Prototype_Int_IntDoubleIntInt>(external);
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval);
+            result = target(arg0, dval, arg1, arg2);
+          } else {
+            dval = get_double_from_register_pair(2);
+            result = target(arg0, dval, arg4, arg5);
+          }
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, result);
+          break;
+        }
+        case Args_Double_DoubleDoubleDouble: {
+          double dval0, dval1, dval2;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          // the last argument is on stack
+          getFpFromStack(stack_pointer, &dval2);
+          Prototype_Double_DoubleDoubleDouble target =
+              reinterpret_cast<Prototype_Double_DoubleDoubleDouble>(external);
+          double dresult = target(dval0, dval1, dval2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_DoubleDoubleDoubleDouble: {
+          double dval0, dval1, dval2, dval3;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          // the two last arguments are on stack
+          getFpFromStack(stack_pointer, &dval2);
+          getFpFromStack(stack_pointer + 2, &dval3);
+          Prototype_Double_DoubleDoubleDoubleDouble target =
+              reinterpret_cast<Prototype_Double_DoubleDoubleDoubleDouble>(
+                  external);
+          double dresult = target(dval0, dval1, dval2, dval3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+
+        case Args_Int32_General: {
+          Prototype_Int32_General target =
+              reinterpret_cast<Prototype_Int32_General>(external);
+          int64_t result = target(arg0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32: {
+          Prototype_Int32_GeneralInt32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32>(external);
+          int64_t result = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32: {
+          Prototype_Int32_GeneralInt32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32Int32: {
+          Prototype_Int32_GeneralInt32Int32Int32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32Int32Int32: {
+          Prototype_Int32_GeneralInt32Int32Int32Int32Int32 target =
+              reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Int32Int32Int32Int32>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32Int32General: {
+          Prototype_Int32_GeneralInt32Int32Int32Int32General target =
+              reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Int32Int32Int32General>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32Int32Int32Int32General: {
+          Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General target =
+              reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General>(
+                  external);
+          int64_t result =
+              target(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General: {
+          float fval0, fval1;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(2, &fval0);
+            get_float_from_s_register(3, &fval1);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg2);
+            fval1 = mozilla::BitwiseCast<float>(arg3);
+          }
+          Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General
+              target = reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General>(
+                  external);
+          int64_t result =
+              target(arg0, arg1, fval0, fval1, arg4, arg5, arg6, arg7);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General: {
+          float fval0, fval1, fval2, fval3;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(2, &fval0);
+            get_float_from_s_register(3, &fval1);
+            get_float_from_s_register(4, &fval2);
+            get_float_from_s_register(5, &fval3);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg2);
+            fval1 = mozilla::BitwiseCast<float>(arg3);
+            fval2 = mozilla::BitwiseCast<float>(arg4);
+            fval3 = mozilla::BitwiseCast<float>(arg5);
+          }
+          Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General
+              target = reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General>(
+                  external);
+          int64_t result = target(arg0, arg1, fval0, fval1, fval2, fval3, arg6,
+                                  arg7, arg8, arg9, arg10);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General: {
+          float fval0, fval1, fval2, fval3, fval4;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(2, &fval0);
+            get_float_from_s_register(3, &fval1);
+            get_float_from_s_register(5, &fval2);
+            get_float_from_s_register(6, &fval3);
+            get_float_from_s_register(8, &fval4);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg2);
+            fval1 = mozilla::BitwiseCast<float>(arg3);
+            fval2 = mozilla::BitwiseCast<float>(arg5);
+            fval3 = mozilla::BitwiseCast<float>(arg6);
+            fval4 = mozilla::BitwiseCast<float>(arg8);
+          }
+          Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General
+              target = reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General>(
+                  external);
+          int64_t result =
+              target(arg0, arg1, fval0, fval1, arg4, fval2, fval3, arg7, fval4,
+                     arg9, arg10, arg11, arg12, arg13);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32General: {
+          Prototype_Int32_GeneralInt32Int32Int32General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32General>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int64: {
+          Prototype_Int32_GeneralInt32Int32Int64 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int64>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, MakeInt64(arg3, arg4));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32General: {
+          Prototype_Int32_GeneralInt32Int32General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32General>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int64Int64: {
+          Prototype_Int32_GeneralInt32Int64Int64 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int64Int64>(
+                  external);
+          int64_t result =
+              target(arg0, arg1, MakeInt64(arg2, arg3), MakeInt64(arg4, arg5));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32GeneralInt32: {
+          Prototype_Int32_GeneralInt32GeneralInt32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32GeneralInt32Int32: {
+          Prototype_Int32_GeneralInt32GeneralInt32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32Int32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralGeneral: {
+          Prototype_Int32_GeneralGeneral target =
+              reinterpret_cast<Prototype_Int32_GeneralGeneral>(external);
+          int64_t result = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralGeneralGeneral: {
+          Prototype_Int32_GeneralGeneralGeneral target =
+              reinterpret_cast<Prototype_Int32_GeneralGeneralGeneral>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralGeneralInt32Int32: {
+          Prototype_Int32_GeneralGeneralInt32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralGeneralInt32Int32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int32Int32Int32: {
+          Prototype_Int32_GeneralInt64Int32Int32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int32Int32>(
+                  external);
+          int64_t result =
+              target(arg0, MakeInt64(arg2, arg3), arg4, arg5, arg6);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int32: {
+          Prototype_Int32_GeneralInt64Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int32>(external);
+          int64_t result = target(arg0, MakeInt64(arg2, arg3), arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int32Int64: {
+          Prototype_Int32_GeneralInt64Int32Int64 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64>(
+                  external);
+          int64_t result =
+              target(arg0, MakeInt64(arg2, arg3), arg4, MakeInt64(arg6, arg7));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int32Int64General: {
+          Prototype_Int32_GeneralInt64Int32Int64General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64General>(
+                  external);
+          int64_t result = target(arg0, MakeInt64(arg2, arg3), arg4,
+                                  MakeInt64(arg6, arg7), arg8);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int64Int64: {
+          Prototype_Int32_GeneralInt64Int64Int64 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64>(
+                  external);
+          int64_t result = target(arg0, MakeInt64(arg2, arg3),
+                                  MakeInt64(arg4, arg5), MakeInt64(arg6, arg7));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int64General: {
+          Prototype_Int32_GeneralInt64Int64General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int64General>(
+                  external);
+          int64_t result =
+              target(arg0, MakeInt64(arg2, arg3), MakeInt64(arg4, arg5), arg6);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int64Int64General: {
+          Prototype_Int32_GeneralInt64Int64Int64General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64General>(
+                  external);
+          int64_t result =
+              target(arg0, MakeInt64(arg2, arg3), MakeInt64(arg4, arg5),
+                     MakeInt64(arg6, arg7), arg8);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General_GeneralInt32: {
+          Prototype_General_GeneralInt32 target =
+              reinterpret_cast<Prototype_General_GeneralInt32>(external);
+          int64_t result = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General_GeneralInt32Int32: {
+          Prototype_General_GeneralInt32Int32 target =
+              reinterpret_cast<Prototype_General_GeneralInt32Int32>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General_GeneralInt32General: {
+          Prototype_General_GeneralInt32General target =
+              reinterpret_cast<Prototype_General_GeneralInt32General>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case js::jit::Args_General_GeneralInt32Int32GeneralInt32: {
+          Prototype_General_GeneralInt32Int32GeneralInt32 target =
+              reinterpret_cast<Prototype_General_GeneralInt32Int32GeneralInt32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case js::jit::Args_Int32_GeneralGeneralInt32General: {
+          Prototype_Int32_GeneralGeneralInt32General target =
+              reinterpret_cast<Prototype_Int32_GeneralGeneralInt32General>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case js::jit::Args_Int32_GeneralGeneralInt32GeneralInt32Int32Int32: {
+          Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32 target =
+              reinterpret_cast<
+                  Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5, arg6);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int64_General: {
+          Prototype_Int64_General target =
+              reinterpret_cast<Prototype_Int64_General>(external);
+          int64_t result = target(arg0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int64_GeneralInt64: {
+          Prototype_Int64_GeneralInt64 target =
+              reinterpret_cast<Prototype_Int64_GeneralInt64>(external);
+          int64_t result = target(arg0, MakeInt64(arg2, arg3));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+
+        default:
+          MOZ_CRASH("call");
+      }
+
+      if (single_stepping_) {
+        single_step_callback_(single_step_callback_arg_, this, nullptr);
+      }
+
+      set_register(lr, saved_lr);
+      set_pc(get_register(lr));
+      break;
+    }
+    case kBreakpoint: {
+      ArmDebugger dbg(this);
+      dbg.debug();
+      break;
+    }
+    default: {  // Stop uses all codes greater than 1 << 23.
+      if (svc >= (1 << 23)) {
+        uint32_t code = svc & kStopCodeMask;
+        if (isWatchedStop(code)) {
+          increaseStopCounter(code);
+        }
+
+        // Stop if it is enabled, otherwise go on jumping over the stop and
+        // the message address.
+        if (isEnabledStop(code)) {
+          ArmDebugger dbg(this);
+          dbg.stop(instr);
+        } else {
+          set_pc(get_pc() + 2 * SimInstruction::kInstrSize);
+        }
+      } else {
+        // This is not a valid svc code.
+        MOZ_CRASH();
+        break;
+      }
+    }
+  }
+}
+
+void Simulator::canonicalizeNaN(double* value) {
+  if (!wasm::CodeExists && !wasm::LookupCodeSegment(get_pc_as<void*>()) &&
+      FPSCR_default_NaN_mode_) {
+    *value = JS::CanonicalizeNaN(*value);
+  }
+}
+
+void Simulator::canonicalizeNaN(float* value) {
+  if (!wasm::CodeExists && !wasm::LookupCodeSegment(get_pc_as<void*>()) &&
+      FPSCR_default_NaN_mode_) {
+    *value = JS::CanonicalizeNaN(*value);
+  }
+}
+
+// Stop helper functions.
+bool Simulator::isStopInstruction(SimInstruction* instr) {
+  return (instr->bits(27, 24) == 0xF) && (instr->svcValue() >= kStopCode);
+}
+
+bool Simulator::isWatchedStop(uint32_t code) {
+  MOZ_ASSERT(code <= kMaxStopCode);
+  return code < kNumOfWatchedStops;
+}
+
+bool Simulator::isEnabledStop(uint32_t code) {
+  MOZ_ASSERT(code <= kMaxStopCode);
+  // Unwatched stops are always enabled.
+  return !isWatchedStop(code) ||
+         !(watched_stops_[code].count & kStopDisabledBit);
+}
+
+void Simulator::enableStop(uint32_t code) {
+  MOZ_ASSERT(isWatchedStop(code));
+  if (!isEnabledStop(code)) {
+    watched_stops_[code].count &= ~kStopDisabledBit;
+  }
+}
+
+void Simulator::disableStop(uint32_t code) {
+  MOZ_ASSERT(isWatchedStop(code));
+  if (isEnabledStop(code)) {
+    watched_stops_[code].count |= kStopDisabledBit;
+  }
+}
+
+void Simulator::increaseStopCounter(uint32_t code) {
+  MOZ_ASSERT(code <= kMaxStopCode);
+  MOZ_ASSERT(isWatchedStop(code));
+  if ((watched_stops_[code].count & ~(1 << 31)) == 0x7fffffff) {
+    printf(
+        "Stop counter for code %i has overflowed.\n"
+        "Enabling this code and reseting the counter to 0.\n",
+        code);
+    watched_stops_[code].count = 0;
+    enableStop(code);
+  } else {
+    watched_stops_[code].count++;
+  }
+}
+
+// Print a stop status.
+void Simulator::printStopInfo(uint32_t code) {
+  MOZ_ASSERT(code <= kMaxStopCode);
+  if (!isWatchedStop(code)) {
+    printf("Stop not watched.");
+  } else {
+    const char* state = isEnabledStop(code) ? "Enabled" : "Disabled";
+    int32_t count = watched_stops_[code].count & ~kStopDisabledBit;
+    // Don't print the state of unused breakpoints.
+    if (count != 0) {
+      if (watched_stops_[code].desc) {
+        printf("stop %i - 0x%x: \t%s, \tcounter = %i, \t%s\n", code, code,
+               state, count, watched_stops_[code].desc);
+      } else {
+        printf("stop %i - 0x%x: \t%s, \tcounter = %i\n", code, code, state,
+               count);
+      }
+    }
+  }
+}
+
+// Instruction types 0 and 1 are both rolled into one function because they only
+// differ in the handling of the shifter_operand.
+void Simulator::decodeType01(SimInstruction* instr) {
+  int type = instr->typeValue();
+  if (type == 0 && instr->isSpecialType0()) {
+    // Multiply instruction or extra loads and stores.
+    if (instr->bits(7, 4) == 9) {
+      if (instr->bit(24) == 0) {
+        // Raw field decoding here. Multiply instructions have their Rd
+        // in funny places.
+        int rn = instr->rnValue();
+        int rm = instr->rmValue();
+        int rs = instr->rsValue();
+        int32_t rs_val = get_register(rs);
+        int32_t rm_val = get_register(rm);
+        if (instr->bit(23) == 0) {
+          if (instr->bit(21) == 0) {
+            // The MUL instruction description (A 4.1.33) refers to
+            // Rd as being the destination for the operation, but it
+            // confusingly uses the Rn field to encode it.
+            int rd = rn;  // Remap the rn field to the Rd register.
+            int32_t alu_out = rm_val * rs_val;
+            set_register(rd, alu_out);
+            if (instr->hasS()) {
+              setNZFlags(alu_out);
+            }
+          } else {
+            int rd = instr->rdValue();
+            int32_t acc_value = get_register(rd);
+            if (instr->bit(22) == 0) {
+              // The MLA instruction description (A 4.1.28) refers
+              // to the order of registers as "Rd, Rm, Rs,
+              // Rn". But confusingly it uses the Rn field to
+              // encode the Rd register and the Rd field to encode
+              // the Rn register.
+              int32_t mul_out = rm_val * rs_val;
+              int32_t result = acc_value + mul_out;
+              set_register(rn, result);
+            } else {
+              int32_t mul_out = rm_val * rs_val;
+              int32_t result = acc_value - mul_out;
+              set_register(rn, result);
+            }
+          }
+        } else {
+          // The signed/long multiply instructions use the terms RdHi
+          // and RdLo when referring to the target registers. They are
+          // mapped to the Rn and Rd fields as follows:
+          // RdLo == Rd
+          // RdHi == Rn (This is confusingly stored in variable rd here
+          //             because the mul instruction from above uses the
+          //             Rn field to encode the Rd register. Good luck figuring
+          //             this out without reading the ARM instruction manual
+          //             at a very detailed level.)
+          int rd_hi = rn;  // Remap the rn field to the RdHi register.
+          int rd_lo = instr->rdValue();
+          int32_t hi_res = 0;
+          int32_t lo_res = 0;
+          if (instr->bit(22) == 1) {
+            int64_t left_op = static_cast<int32_t>(rm_val);
+            int64_t right_op = static_cast<int32_t>(rs_val);
+            uint64_t result = left_op * right_op;
+            hi_res = static_cast<int32_t>(result >> 32);
+            lo_res = static_cast<int32_t>(result & 0xffffffff);
+          } else {
+            // Unsigned multiply.
+            uint64_t left_op = static_cast<uint32_t>(rm_val);
+            uint64_t right_op = static_cast<uint32_t>(rs_val);
+            uint64_t result = left_op * right_op;
+            hi_res = static_cast<int32_t>(result >> 32);
+            lo_res = static_cast<int32_t>(result & 0xffffffff);
+          }
+          set_register(rd_lo, lo_res);
+          set_register(rd_hi, hi_res);
+          if (instr->hasS()) {
+            MOZ_CRASH();
+          }
+        }
+      } else {
+        if (instr->bits(excl::ExclusiveOpHi, excl::ExclusiveOpLo) ==
+            excl::ExclusiveOpcode) {
+          // Load-exclusive / store-exclusive.
+          if (instr->bit(excl::ExclusiveLoad)) {
+            int rn = instr->rnValue();
+            int rt = instr->rtValue();
+            int32_t address = get_register(rn);
+            switch (instr->bits(excl::ExclusiveSizeHi, excl::ExclusiveSizeLo)) {
+              case excl::ExclusiveWord:
+                set_register(rt, readExW(address, instr));
+                break;
+              case excl::ExclusiveDouble: {
+                MOZ_ASSERT((rt % 2) == 0);
+                int32_t hibits;
+                int32_t lobits = readExDW(address, &hibits);
+                set_register(rt, lobits);
+                set_register(rt + 1, hibits);
+                break;
+              }
+              case excl::ExclusiveByte:
+                set_register(rt, readExBU(address));
+                break;
+              case excl::ExclusiveHalf:
+                set_register(rt, readExHU(address, instr));
+                break;
+            }
+          } else {
+            int rn = instr->rnValue();
+            int rd = instr->rdValue();
+            int rt = instr->bits(3, 0);
+            int32_t address = get_register(rn);
+            int32_t value = get_register(rt);
+            int32_t result = 0;
+            switch (instr->bits(excl::ExclusiveSizeHi, excl::ExclusiveSizeLo)) {
+              case excl::ExclusiveWord:
+                result = writeExW(address, value, instr);
+                break;
+              case excl::ExclusiveDouble: {
+                MOZ_ASSERT((rt % 2) == 0);
+                int32_t value2 = get_register(rt + 1);
+                result = writeExDW(address, value, value2);
+                break;
+              }
+              case excl::ExclusiveByte:
+                result = writeExB(address, (uint8_t)value);
+                break;
+              case excl::ExclusiveHalf:
+                result = writeExH(address, (uint16_t)value, instr);
+                break;
+            }
+            set_register(rd, result);
+          }
+        } else {
+          MOZ_CRASH();  // Not used atm
+        }
+      }
+    } else {
+      // Extra load/store instructions.
+      int rd = instr->rdValue();
+      int rn = instr->rnValue();
+      int32_t rn_val = get_register(rn);
+      int32_t addr = 0;
+      if (instr->bit(22) == 0) {
+        int rm = instr->rmValue();
+        int32_t rm_val = get_register(rm);
+        switch (instr->PUField()) {
+          case da_x:
+            MOZ_ASSERT(!instr->hasW());
+            addr = rn_val;
+            rn_val -= rm_val;
+            set_register(rn, rn_val);
+            break;
+          case ia_x:
+            MOZ_ASSERT(!instr->hasW());
+            addr = rn_val;
+            rn_val += rm_val;
+            set_register(rn, rn_val);
+            break;
+          case db_x:
+            rn_val -= rm_val;
+            addr = rn_val;
+            if (instr->hasW()) {
+              set_register(rn, rn_val);
+            }
+            break;
+          case ib_x:
+            rn_val += rm_val;
+            addr = rn_val;
+            if (instr->hasW()) {
+              set_register(rn, rn_val);
+            }
+            break;
+          default:
+            // The PU field is a 2-bit field.
+            MOZ_CRASH();
+            break;
+        }
+      } else {
+        int32_t imm_val = (instr->immedHValue() << 4) | instr->immedLValue();
+        switch (instr->PUField()) {
+          case da_x:
+            MOZ_ASSERT(!instr->hasW());
+            addr = rn_val;
+            rn_val -= imm_val;
+            set_register(rn, rn_val);
+            break;
+          case ia_x:
+            MOZ_ASSERT(!instr->hasW());
+            addr = rn_val;
+            rn_val += imm_val;
+            set_register(rn, rn_val);
+            break;
+          case db_x:
+            rn_val -= imm_val;
+            addr = rn_val;
+            if (instr->hasW()) {
+              set_register(rn, rn_val);
+            }
+            break;
+          case ib_x:
+            rn_val += imm_val;
+            addr = rn_val;
+            if (instr->hasW()) {
+              set_register(rn, rn_val);
+            }
+            break;
+          default:
+            // The PU field is a 2-bit field.
+            MOZ_CRASH();
+            break;
+        }
+      }
+      if ((instr->bits(7, 4) & 0xd) == 0xd && instr->bit(20) == 0) {
+        MOZ_ASSERT((rd % 2) == 0);
+        if (instr->hasH()) {
+          // The strd instruction.
+          int32_t value1 = get_register(rd);
+          int32_t value2 = get_register(rd + 1);
+          writeDW(addr, value1, value2);
+        } else {
+          // The ldrd instruction.
+          int* rn_data = readDW(addr);
+          if (rn_data) {
+            set_dw_register(rd, rn_data);
+          }
+        }
+      } else if (instr->hasH()) {
+        if (instr->hasSign()) {
+          if (instr->hasL()) {
+            int16_t val = readH(addr, instr);
+            set_register(rd, val);
+          } else {
+            int16_t val = get_register(rd);
+            writeH(addr, val, instr);
+          }
+        } else {
+          if (instr->hasL()) {
+            uint16_t val = readHU(addr, instr);
+            set_register(rd, val);
+          } else {
+            uint16_t val = get_register(rd);
+            writeH(addr, val, instr);
+          }
+        }
+      } else {
+        // Signed byte loads.
+        MOZ_ASSERT(instr->hasSign());
+        MOZ_ASSERT(instr->hasL());
+        int8_t val = readB(addr);
+        set_register(rd, val);
+      }
+      return;
+    }
+  } else if ((type == 0) && instr->isMiscType0()) {
+    if (instr->bits(7, 4) == 0) {
+      if (instr->bit(21) == 0) {
+        // mrs
+        int rd = instr->rdValue();
+        uint32_t flags;
+        if (instr->bit(22) == 0) {
+          // CPSR. Note: The Q flag is not yet implemented!
+          flags = (n_flag_ << 31) | (z_flag_ << 30) | (c_flag_ << 29) |
+                  (v_flag_ << 28);
+        } else {
+          // SPSR
+          MOZ_CRASH();
+        }
+        set_register(rd, flags);
+      } else {
+        // msr
+        if (instr->bits(27, 23) == 2) {
+          // Register operand. For now we only emit mask 0b1100.
+          int rm = instr->rmValue();
+          mozilla::DebugOnly<uint32_t> mask = instr->bits(19, 16);
+          MOZ_ASSERT(mask == (3 << 2));
+
+          uint32_t flags = get_register(rm);
+          n_flag_ = (flags >> 31) & 1;
+          z_flag_ = (flags >> 30) & 1;
+          c_flag_ = (flags >> 29) & 1;
+          v_flag_ = (flags >> 28) & 1;
+        } else {
+          MOZ_CRASH();
+        }
+      }
+    } else if (instr->bits(22, 21) == 1) {
+      int rm = instr->rmValue();
+      switch (instr->bits(7, 4)) {
+        case 1:  // BX
+          set_pc(get_register(rm));
+          break;
+        case 3: {  // BLX
+          uint32_t old_pc = get_pc();
+          set_pc(get_register(rm));
+          set_register(lr, old_pc + SimInstruction::kInstrSize);
+          break;
+        }
+        case 7: {  // BKPT
+          fprintf(stderr, "Simulator hit BKPT.\n");
+          if (getenv("ARM_SIM_DEBUGGER")) {
+            ArmDebugger dbg(this);
+            dbg.debug();
+          } else {
+            fprintf(stderr,
+                    "Use ARM_SIM_DEBUGGER=1 to enter the builtin debugger.\n");
+            MOZ_CRASH("ARM simulator breakpoint");
+          }
+          break;
+        }
+        default:
+          MOZ_CRASH();
+      }
+    } else if (instr->bits(22, 21) == 3) {
+      int rm = instr->rmValue();
+      int rd = instr->rdValue();
+      switch (instr->bits(7, 4)) {
+        case 1: {  // CLZ
+          uint32_t bits = get_register(rm);
+          int leading_zeros = 0;
+          if (bits == 0) {
+            leading_zeros = 32;
+          } else {
+            leading_zeros = mozilla::CountLeadingZeroes32(bits);
+          }
+          set_register(rd, leading_zeros);
+          break;
+        }
+        default:
+          MOZ_CRASH();
+          break;
+      }
+    } else {
+      printf("%08x\n", instr->instructionBits());
+      MOZ_CRASH();
+    }
+  } else if ((type == 1) && instr->isNopType1()) {
+    // NOP.
+  } else if ((type == 1) && instr->isCsdbType1()) {
+    // Speculation barrier. (No-op for the simulator)
+  } else {
+    int rd = instr->rdValue();
+    int rn = instr->rnValue();
+    int32_t rn_val = get_register(rn);
+    int32_t shifter_operand = 0;
+    bool shifter_carry_out = 0;
+    if (type == 0) {
+      shifter_operand = getShiftRm(instr, &shifter_carry_out);
+    } else {
+      MOZ_ASSERT(instr->typeValue() == 1);
+      shifter_operand = getImm(instr, &shifter_carry_out);
+    }
+    int32_t alu_out;
+    switch (instr->opcodeField()) {
+      case OpAnd:
+        alu_out = rn_val & shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpEor:
+        alu_out = rn_val ^ shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpSub:
+        alu_out = rn_val - shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(!borrowFrom(rn_val, shifter_operand));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, false));
+        }
+        break;
+      case OpRsb:
+        alu_out = shifter_operand - rn_val;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(!borrowFrom(shifter_operand, rn_val));
+          setVFlag(overflowFrom(alu_out, shifter_operand, rn_val, false));
+        }
+        break;
+      case OpAdd:
+        alu_out = rn_val + shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(carryFrom(rn_val, shifter_operand));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, true));
+        }
+        break;
+      case OpAdc:
+        alu_out = rn_val + shifter_operand + getCarry();
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(carryFrom(rn_val, shifter_operand, getCarry()));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, true));
+        }
+        break;
+      case OpSbc:
+        alu_out = rn_val - shifter_operand - (getCarry() == 0 ? 1 : 0);
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          MOZ_CRASH();
+        }
+        break;
+      case OpRsc:
+        alu_out = shifter_operand - rn_val - (getCarry() == 0 ? 1 : 0);
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          MOZ_CRASH();
+        }
+        break;
+      case OpTst:
+        if (instr->hasS()) {
+          alu_out = rn_val & shifter_operand;
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        } else {
+          alu_out = instr->immedMovwMovtValue();
+          set_register(rd, alu_out);
+        }
+        break;
+      case OpTeq:
+        if (instr->hasS()) {
+          alu_out = rn_val ^ shifter_operand;
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        } else {
+          // Other instructions matching this pattern are handled in the
+          // miscellaneous instructions part above.
+          MOZ_CRASH();
+        }
+        break;
+      case OpCmp:
+        if (instr->hasS()) {
+          alu_out = rn_val - shifter_operand;
+          setNZFlags(alu_out);
+          setCFlag(!borrowFrom(rn_val, shifter_operand));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, false));
+        } else {
+          alu_out =
+              (get_register(rd) & 0xffff) | (instr->immedMovwMovtValue() << 16);
+          set_register(rd, alu_out);
+        }
+        break;
+      case OpCmn:
+        if (instr->hasS()) {
+          alu_out = rn_val + shifter_operand;
+          setNZFlags(alu_out);
+          setCFlag(carryFrom(rn_val, shifter_operand));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, true));
+        } else {
+          // Other instructions matching this pattern are handled in the
+          // miscellaneous instructions part above.
+          MOZ_CRASH();
+        }
+        break;
+      case OpOrr:
+        alu_out = rn_val | shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpMov:
+        alu_out = shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpBic:
+        alu_out = rn_val & ~shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpMvn:
+        alu_out = ~shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      default:
+        MOZ_CRASH();
+        break;
+    }
+  }
+}
+
+void Simulator::decodeType2(SimInstruction* instr) {
+  int rd = instr->rdValue();
+  int rn = instr->rnValue();
+  int32_t rn_val = get_register(rn);
+  int32_t im_val = instr->offset12Value();
+  int32_t addr = 0;
+  switch (instr->PUField()) {
+    case da_x:
+      MOZ_ASSERT(!instr->hasW());
+      addr = rn_val;
+      rn_val -= im_val;
+      set_register(rn, rn_val);
+      break;
+    case ia_x:
+      MOZ_ASSERT(!instr->hasW());
+      addr = rn_val;
+      rn_val += im_val;
+      set_register(rn, rn_val);
+      break;
+    case db_x:
+      rn_val -= im_val;
+      addr = rn_val;
+      if (instr->hasW()) {
+        set_register(rn, rn_val);
+      }
+      break;
+    case ib_x:
+      rn_val += im_val;
+      addr = rn_val;
+      if (instr->hasW()) {
+        set_register(rn, rn_val);
+      }
+      break;
+    default:
+      MOZ_CRASH();
+      break;
+  }
+  if (instr->hasB()) {
+    if (instr->hasL()) {
+      uint8_t val = readBU(addr);
+      set_register(rd, val);
+    } else {
+      uint8_t val = get_register(rd);
+      writeB(addr, val);
+    }
+  } else {
+    if (instr->hasL()) {
+      set_register(rd, readW(addr, instr, AllowUnaligned));
+    } else {
+      writeW(addr, get_register(rd), instr, AllowUnaligned);
+    }
+  }
+}
+
+static uint32_t rotateBytes(uint32_t val, int32_t rotate) {
+  switch (rotate) {
+    default:
+      return val;
+    case 1:
+      return (val >> 8) | (val << 24);
+    case 2:
+      return (val >> 16) | (val << 16);
+    case 3:
+      return (val >> 24) | (val << 8);
+  }
+}
+
+void Simulator::decodeType3(SimInstruction* instr) {
+  if (MOZ_UNLIKELY(instr->isUDF())) {
+    uint8_t* newPC;
+    if (wasm::HandleIllegalInstruction(registerState(), &newPC)) {
+      set_pc((int32_t)newPC);
+      return;
+    }
+    MOZ_CRASH("illegal instruction encountered");
+  }
+
+  int rd = instr->rdValue();
+  int rn = instr->rnValue();
+  int32_t rn_val = get_register(rn);
+  bool shifter_carry_out = 0;
+  int32_t shifter_operand = getShiftRm(instr, &shifter_carry_out);
+  int32_t addr = 0;
+  switch (instr->PUField()) {
+    case da_x:
+      MOZ_ASSERT(!instr->hasW());
+      MOZ_CRASH();
+      break;
+    case ia_x: {
+      if (instr->bit(4) == 0) {
+        // Memop.
+      } else {
+        if (instr->bit(5) == 0) {
+          switch (instr->bits(22, 21)) {
+            case 0:
+              if (instr->bit(20) == 0) {
+                if (instr->bit(6) == 0) {
+                  // Pkhbt.
+                  uint32_t rn_val = get_register(rn);
+                  uint32_t rm_val = get_register(instr->rmValue());
+                  int32_t shift = instr->bits(11, 7);
+                  rm_val <<= shift;
+                  set_register(rd, (rn_val & 0xFFFF) | (rm_val & 0xFFFF0000U));
+                } else {
+                  // Pkhtb.
+                  uint32_t rn_val = get_register(rn);
+                  int32_t rm_val = get_register(instr->rmValue());
+                  int32_t shift = instr->bits(11, 7);
+                  if (shift == 0) {
+                    shift = 32;
+                  }
+                  rm_val >>= shift;
+                  set_register(rd, (rn_val & 0xFFFF0000U) | (rm_val & 0xFFFF));
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 1:
+              MOZ_CRASH();
+              break;
+            case 2:
+              MOZ_CRASH();
+              break;
+            case 3: {
+              // Usat.
+              int32_t sat_pos = instr->bits(20, 16);
+              int32_t sat_val = (1 << sat_pos) - 1;
+              int32_t shift = instr->bits(11, 7);
+              int32_t shift_type = instr->bit(6);
+              int32_t rm_val = get_register(instr->rmValue());
+              if (shift_type == 0) {  // LSL
+                rm_val <<= shift;
+              } else {  // ASR
+                rm_val >>= shift;
+              }
+
+              // If saturation occurs, the Q flag should be set in the
+              // CPSR. There is no Q flag yet, and no instruction (MRS)
+              // to read the CPSR directly.
+              if (rm_val > sat_val) {
+                rm_val = sat_val;
+              } else if (rm_val < 0) {
+                rm_val = 0;
+              }
+              set_register(rd, rm_val);
+              break;
+            }
+          }
+        } else {
+          switch (instr->bits(22, 21)) {
+            case 0:
+              MOZ_CRASH();
+              break;
+            case 1:
+              if (instr->bits(7, 4) == 7 && instr->bits(19, 16) == 15) {
+                uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                              instr->bits(11, 10));
+                if (instr->bit(20)) {
+                  // Sxth.
+                  set_register(rd, (int32_t)(int16_t)(rm_val & 0xFFFF));
+                } else {
+                  // Sxtb.
+                  set_register(rd, (int32_t)(int8_t)(rm_val & 0xFF));
+                }
+              } else if (instr->bits(20, 16) == 0b1'1111 &&
+                         instr->bits(11, 4) == 0b1111'0011) {
+                // Rev
+                uint32_t rm_val = get_register(instr->rmValue());
+
+                static_assert(MOZ_LITTLE_ENDIAN());
+                set_register(rd,
+                             mozilla::NativeEndian::swapToBigEndian(rm_val));
+              } else if (instr->bits(20, 16) == 0b1'1111 &&
+                         instr->bits(11, 4) == 0b1111'1011) {
+                // Rev16
+                uint32_t rm_val = get_register(instr->rmValue());
+
+                static_assert(MOZ_LITTLE_ENDIAN());
+                uint32_t hi = mozilla::NativeEndian::swapToBigEndian(
+                    uint16_t(rm_val >> 16));
+                uint32_t lo =
+                    mozilla::NativeEndian::swapToBigEndian(uint16_t(rm_val));
+                set_register(rd, (hi << 16) | lo);
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 2:
+              if ((instr->bit(20) == 0) && (instr->bits(9, 6) == 1)) {
+                if (instr->bits(19, 16) == 0xF) {
+                  // Uxtb16.
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, (rm_val & 0xFF) | (rm_val & 0xFF0000));
+                } else {
+                  MOZ_CRASH();
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 3:
+              if ((instr->bit(20) == 0) && (instr->bits(9, 6) == 1)) {
+                if (instr->bits(19, 16) == 0xF) {
+                  // Uxtb.
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, (rm_val & 0xFF));
+                } else {
+                  // Uxtab.
+                  uint32_t rn_val = get_register(rn);
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, rn_val + (rm_val & 0xFF));
+                }
+              } else if ((instr->bit(20) == 1) && (instr->bits(9, 6) == 1)) {
+                if (instr->bits(19, 16) == 0xF) {
+                  // Uxth.
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, (rm_val & 0xFFFF));
+                } else {
+                  // Uxtah.
+                  uint32_t rn_val = get_register(rn);
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, rn_val + (rm_val & 0xFFFF));
+                }
+              } else if (instr->bits(20, 16) == 0b1'1111 &&
+                         instr->bits(11, 4) == 0b1111'1011) {
+                // Revsh
+                uint32_t rm_val = get_register(instr->rmValue());
+
+                static_assert(MOZ_LITTLE_ENDIAN());
+                set_register(
+                    rd, int32_t(int16_t(mozilla::NativeEndian::swapToBigEndian(
+                            uint16_t(rm_val)))));
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+          }
+        }
+        return;
+      }
+      break;
+    }
+    case db_x: {  // sudiv
+      if (instr->bit(22) == 0x0 && instr->bit(20) == 0x1 &&
+          instr->bits(15, 12) == 0x0f && instr->bits(7, 4) == 0x1) {
+        if (!instr->hasW()) {
+          // sdiv (in V8 notation matching ARM ISA format) rn = rm/rs.
+          int rm = instr->rmValue();
+          int32_t rm_val = get_register(rm);
+          int rs = instr->rsValue();
+          int32_t rs_val = get_register(rs);
+          int32_t ret_val = 0;
+          MOZ_ASSERT(rs_val != 0);
+          if ((rm_val == INT32_MIN) && (rs_val == -1)) {
+            ret_val = INT32_MIN;
+          } else {
+            ret_val = rm_val / rs_val;
+          }
+          set_register(rn, ret_val);
+          return;
+        } else {
+          // udiv (in V8 notation matching ARM ISA format) rn = rm/rs.
+          int rm = instr->rmValue();
+          uint32_t rm_val = get_register(rm);
+          int rs = instr->rsValue();
+          uint32_t rs_val = get_register(rs);
+          uint32_t ret_val = 0;
+          MOZ_ASSERT(rs_val != 0);
+          ret_val = rm_val / rs_val;
+          set_register(rn, ret_val);
+          return;
+        }
+      }
+
+      addr = rn_val - shifter_operand;
+      if (instr->hasW()) {
+        set_register(rn, addr);
+      }
+      break;
+    }
+    case ib_x: {
+      if (instr->hasW() && (instr->bits(6, 4) == 0x5)) {
+        uint32_t widthminus1 = static_cast<uint32_t>(instr->bits(20, 16));
+        uint32_t lsbit = static_cast<uint32_t>(instr->bits(11, 7));
+        uint32_t msbit = widthminus1 + lsbit;
+        if (msbit <= 31) {
+          if (instr->bit(22)) {
+            // ubfx - unsigned bitfield extract.
+            uint32_t rm_val =
+                static_cast<uint32_t>(get_register(instr->rmValue()));
+            uint32_t extr_val = rm_val << (31 - msbit);
+            extr_val = extr_val >> (31 - widthminus1);
+            set_register(instr->rdValue(), extr_val);
+          } else {
+            // sbfx - signed bitfield extract.
+            int32_t rm_val = get_register(instr->rmValue());
+            int32_t extr_val = rm_val << (31 - msbit);
+            extr_val = extr_val >> (31 - widthminus1);
+            set_register(instr->rdValue(), extr_val);
+          }
+        } else {
+          MOZ_CRASH();
+        }
+        return;
+      } else if (!instr->hasW() && (instr->bits(6, 4) == 0x1)) {
+        uint32_t lsbit = static_cast<uint32_t>(instr->bits(11, 7));
+        uint32_t msbit = static_cast<uint32_t>(instr->bits(20, 16));
+        if (msbit >= lsbit) {
+          // bfc or bfi - bitfield clear/insert.
+          uint32_t rd_val =
+              static_cast<uint32_t>(get_register(instr->rdValue()));
+          uint32_t bitcount = msbit - lsbit + 1;
+          uint32_t mask = (1 << bitcount) - 1;
+          rd_val &= ~(mask << lsbit);
+          if (instr->rmValue() != 15) {
+            // bfi - bitfield insert.
+            uint32_t rm_val =
+                static_cast<uint32_t>(get_register(instr->rmValue()));
+            rm_val &= mask;
+            rd_val |= rm_val << lsbit;
+          }
+          set_register(instr->rdValue(), rd_val);
+        } else {
+          MOZ_CRASH();
+        }
+        return;
+      } else {
+        addr = rn_val + shifter_operand;
+        if (instr->hasW()) {
+          set_register(rn, addr);
+        }
+      }
+      break;
+    }
+    default:
+      MOZ_CRASH();
+      break;
+  }
+  if (instr->hasB()) {
+    if (instr->hasL()) {
+      uint8_t byte = readB(addr);
+      set_register(rd, byte);
+    } else {
+      uint8_t byte = get_register(rd);
+      writeB(addr, byte);
+    }
+  } else {
+    if (instr->hasL()) {
+      set_register(rd, readW(addr, instr, AllowUnaligned));
+    } else {
+      writeW(addr, get_register(rd), instr, AllowUnaligned);
+    }
+  }
+}
+
+void Simulator::decodeType4(SimInstruction* instr) {
+  // Only allowed to be set in privileged mode.
+  MOZ_ASSERT(instr->bit(22) == 0);
+  bool load = instr->hasL();
+  handleRList(instr, load);
+}
+
+void Simulator::decodeType5(SimInstruction* instr) {
+  int off = instr->sImmed24Value() << 2;
+  intptr_t pc_address = get_pc();
+  if (instr->hasLink()) {
+    set_register(lr, pc_address + SimInstruction::kInstrSize);
+  }
+  int pc_reg = get_register(pc);
+  set_pc(pc_reg + off);
+}
+
+void Simulator::decodeType6(SimInstruction* instr) {
+  decodeType6CoprocessorIns(instr);
+}
+
+void Simulator::decodeType7(SimInstruction* instr) {
+  if (instr->bit(24) == 1) {
+    softwareInterrupt(instr);
+  } else if (instr->bit(4) == 1 && instr->bits(11, 9) != 5) {
+    decodeType7CoprocessorIns(instr);
+  } else {
+    decodeTypeVFP(instr);
+  }
+}
+
+void Simulator::decodeType7CoprocessorIns(SimInstruction* instr) {
+  if (instr->bit(20) == 0) {
+    // MCR, MCR2
+    if (instr->coprocessorValue() == 15) {
+      int opc1 = instr->bits(23, 21);
+      int opc2 = instr->bits(7, 5);
+      int CRn = instr->bits(19, 16);
+      int CRm = instr->bits(3, 0);
+      if (opc1 == 0 && opc2 == 4 && CRn == 7 && CRm == 10) {
+        // ARMv6 DSB instruction.  We do not use DSB.
+        MOZ_CRASH("DSB not implemented");
+      } else if (opc1 == 0 && opc2 == 5 && CRn == 7 && CRm == 10) {
+        // ARMv6 DMB instruction.
+        AtomicOperations::fenceSeqCst();
+      } else if (opc1 == 0 && opc2 == 4 && CRn == 7 && CRm == 5) {
+        // ARMv6 ISB instruction.  We do not use ISB.
+        MOZ_CRASH("ISB not implemented");
+      } else {
+        MOZ_CRASH();
+      }
+    } else {
+      MOZ_CRASH();
+    }
+  } else {
+    // MRC, MRC2
+    MOZ_CRASH();
+  }
+}
+
+void Simulator::decodeTypeVFP(SimInstruction* instr) {
+  MOZ_ASSERT(instr->typeValue() == 7 && instr->bit(24) == 0);
+  MOZ_ASSERT(instr->bits(11, 9) == 0x5);
+
+  // Obtain double precision register codes.
+  VFPRegPrecision precision =
+      (instr->szValue() == 1) ? kDoublePrecision : kSinglePrecision;
+  int vm = instr->VFPMRegValue(precision);
+  int vd = instr->VFPDRegValue(precision);
+  int vn = instr->VFPNRegValue(precision);
+
+  if (instr->bit(4) == 0) {
+    if (instr->opc1Value() == 0x7) {
+      // Other data processing instructions.
+      if ((instr->opc2Value() == 0x0) && (instr->opc3Value() == 0x1)) {
+        // vmov register to register.
+        if (instr->szValue() == 0x1) {
+          int m = instr->VFPMRegValue(kDoublePrecision);
+          int d = instr->VFPDRegValue(kDoublePrecision);
+          double temp;
+          get_double_from_d_register(m, &temp);
+          set_d_register_from_double(d, temp);
+        } else {
+          int m = instr->VFPMRegValue(kSinglePrecision);
+          int d = instr->VFPDRegValue(kSinglePrecision);
+          float temp;
+          get_float_from_s_register(m, &temp);
+          set_s_register_from_float(d, temp);
+        }
+      } else if ((instr->opc2Value() == 0x0) && (instr->opc3Value() == 0x3)) {
+        // vabs
+        if (instr->szValue() == 0x1) {
+          union {
+            double f64;
+            uint64_t u64;
+          } u;
+          get_double_from_d_register(vm, &u.f64);
+          u.u64 &= 0x7fffffffffffffffu;
+          double dd_value = u.f64;
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        } else {
+          union {
+            float f32;
+            uint32_t u32;
+          } u;
+          get_float_from_s_register(vm, &u.f32);
+          u.u32 &= 0x7fffffffu;
+          float fd_value = u.f32;
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        }
+      } else if ((instr->opc2Value() == 0x1) && (instr->opc3Value() == 0x1)) {
+        // vneg
+        if (instr->szValue() == 0x1) {
+          double dm_value;
+          get_double_from_d_register(vm, &dm_value);
+          double dd_value = -dm_value;
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        } else {
+          float fm_value;
+          get_float_from_s_register(vm, &fm_value);
+          float fd_value = -fm_value;
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        }
+      } else if ((instr->opc2Value() == 0x7) && (instr->opc3Value() == 0x3)) {
+        decodeVCVTBetweenDoubleAndSingle(instr);
+      } else if ((instr->opc2Value() == 0x8) && (instr->opc3Value() & 0x1)) {
+        decodeVCVTBetweenFloatingPointAndInteger(instr);
+      } else if ((instr->opc2Value() == 0xA) && (instr->opc3Value() == 0x3) &&
+                 (instr->bit(8) == 1)) {
+        // vcvt.f64.s32 Dd, Dd, #<fbits>.
+        int fraction_bits = 32 - ((instr->bits(3, 0) << 1) | instr->bit(5));
+        int fixed_value = get_sinteger_from_s_register(vd * 2);
+        double divide = 1 << fraction_bits;
+        set_d_register_from_double(vd, fixed_value / divide);
+      } else if (((instr->opc2Value() >> 1) == 0x6) &&
+                 (instr->opc3Value() & 0x1)) {
+        decodeVCVTBetweenFloatingPointAndInteger(instr);
+      } else if (((instr->opc2Value() == 0x4) || (instr->opc2Value() == 0x5)) &&
+                 (instr->opc3Value() & 0x1)) {
+        decodeVCMP(instr);
+      } else if (((instr->opc2Value() == 0x1)) && (instr->opc3Value() == 0x3)) {
+        // vsqrt
+        if (instr->szValue() == 0x1) {
+          double dm_value;
+          get_double_from_d_register(vm, &dm_value);
+          double dd_value = std::sqrt(dm_value);
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        } else {
+          float fm_value;
+          get_float_from_s_register(vm, &fm_value);
+          float fd_value = std::sqrt(fm_value);
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        }
+      } else if (instr->opc3Value() == 0x0) {
+        // vmov immediate.
+        if (instr->szValue() == 0x1) {
+          set_d_register_from_double(vd, instr->doubleImmedVmov());
+        } else {
+          // vmov.f32 immediate.
+          set_s_register_from_float(vd, instr->float32ImmedVmov());
+        }
+      } else {
+        decodeVCVTBetweenFloatingPointAndIntegerFrac(instr);
+      }
+    } else if (instr->opc1Value() == 0x3) {
+      if (instr->szValue() != 0x1) {
+        if (instr->opc3Value() & 0x1) {
+          // vsub
+          float fn_value;
+          get_float_from_s_register(vn, &fn_value);
+          float fm_value;
+          get_float_from_s_register(vm, &fm_value);
+          float fd_value = fn_value - fm_value;
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        } else {
+          // vadd
+          float fn_value;
+          get_float_from_s_register(vn, &fn_value);
+          float fm_value;
+          get_float_from_s_register(vm, &fm_value);
+          float fd_value = fn_value + fm_value;
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        }
+      } else {
+        if (instr->opc3Value() & 0x1) {
+          // vsub
+          double dn_value;
+          get_double_from_d_register(vn, &dn_value);
+          double dm_value;
+          get_double_from_d_register(vm, &dm_value);
+          double dd_value = dn_value - dm_value;
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        } else {
+          // vadd
+          double dn_value;
+          get_double_from_d_register(vn, &dn_value);
+          double dm_value;
+          get_double_from_d_register(vm, &dm_value);
+          double dd_value = dn_value + dm_value;
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        }
+      }
+    } else if ((instr->opc1Value() == 0x2) && !(instr->opc3Value() & 0x1)) {
+      // vmul
+      if (instr->szValue() != 0x1) {
+        float fn_value;
+        get_float_from_s_register(vn, &fn_value);
+        float fm_value;
+        get_float_from_s_register(vm, &fm_value);
+        float fd_value = fn_value * fm_value;
+        canonicalizeNaN(&fd_value);
+        set_s_register_from_float(vd, fd_value);
+      } else {
+        double dn_value;
+        get_double_from_d_register(vn, &dn_value);
+        double dm_value;
+        get_double_from_d_register(vm, &dm_value);
+        double dd_value = dn_value * dm_value;
+        canonicalizeNaN(&dd_value);
+        set_d_register_from_double(vd, dd_value);
+      }
+    } else if ((instr->opc1Value() == 0x0)) {
+      // vmla, vmls
+      const bool is_vmls = (instr->opc3Value() & 0x1);
+
+      if (instr->szValue() != 0x1) {
+        MOZ_CRASH("Not used by V8.");
+      }
+
+      double dd_val;
+      get_double_from_d_register(vd, &dd_val);
+      double dn_val;
+      get_double_from_d_register(vn, &dn_val);
+      double dm_val;
+      get_double_from_d_register(vm, &dm_val);
+
+      // Note: we do the mul and add/sub in separate steps to avoid
+      // getting a result with too high precision.
+      set_d_register_from_double(vd, dn_val * dm_val);
+      double temp;
+      get_double_from_d_register(vd, &temp);
+      if (is_vmls) {
+        temp = dd_val - temp;
+      } else {
+        temp = dd_val + temp;
+      }
+      canonicalizeNaN(&temp);
+      set_d_register_from_double(vd, temp);
+    } else if ((instr->opc1Value() == 0x4) && !(instr->opc3Value() & 0x1)) {
+      // vdiv
+      if (instr->szValue() != 0x1) {
+        float fn_value;
+        get_float_from_s_register(vn, &fn_value);
+        float fm_value;
+        get_float_from_s_register(vm, &fm_value);
+        float fd_value = fn_value / fm_value;
+        div_zero_vfp_flag_ = (fm_value == 0);
+        canonicalizeNaN(&fd_value);
+        set_s_register_from_float(vd, fd_value);
+      } else {
+        double dn_value;
+        get_double_from_d_register(vn, &dn_value);
+        double dm_value;
+        get_double_from_d_register(vm, &dm_value);
+        double dd_value = dn_value / dm_value;
+        div_zero_vfp_flag_ = (dm_value == 0);
+        canonicalizeNaN(&dd_value);
+        set_d_register_from_double(vd, dd_value);
+      }
+    } else {
+      MOZ_CRASH();
+    }
+  } else {
+    if (instr->VCValue() == 0x0 && instr->VAValue() == 0x0) {
+      decodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
+    } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1) &&
+               (instr->bit(23) == 0x0)) {
+      // vmov (ARM core register to scalar).
+      int vd = instr->bits(19, 16) | (instr->bit(7) << 4);
+      double dd_value;
+      get_double_from_d_register(vd, &dd_value);
+      int32_t data[2];
+      memcpy(data, &dd_value, 8);
+      data[instr->bit(21)] = get_register(instr->rtValue());
+      memcpy(&dd_value, data, 8);
+      set_d_register_from_double(vd, dd_value);
+    } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1) &&
+               (instr->bit(23) == 0x0)) {
+      // vmov (scalar to ARM core register).
+      int vn = instr->bits(19, 16) | (instr->bit(7) << 4);
+      double dn_value;
+      get_double_from_d_register(vn, &dn_value);
+      int32_t data[2];
+      memcpy(data, &dn_value, 8);
+      set_register(instr->rtValue(), data[instr->bit(21)]);
+    } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x0) &&
+               (instr->VAValue() == 0x7) && (instr->bits(19, 16) == 0x1)) {
+      // vmrs
+      uint32_t rt = instr->rtValue();
+      if (rt == 0xF) {
+        copy_FPSCR_to_APSR();
+      } else {
+        // Emulate FPSCR from the Simulator flags.
+        uint32_t fpscr = (n_flag_FPSCR_ << 31) | (z_flag_FPSCR_ << 30) |
+                         (c_flag_FPSCR_ << 29) | (v_flag_FPSCR_ << 28) |
+                         (FPSCR_default_NaN_mode_ << 25) |
+                         (inexact_vfp_flag_ << 4) | (underflow_vfp_flag_ << 3) |
+                         (overflow_vfp_flag_ << 2) | (div_zero_vfp_flag_ << 1) |
+                         (inv_op_vfp_flag_ << 0) | (FPSCR_rounding_mode_);
+        set_register(rt, fpscr);
+      }
+    } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x0) &&
+               (instr->VAValue() == 0x7) && (instr->bits(19, 16) == 0x1)) {
+      // vmsr
+      uint32_t rt = instr->rtValue();
+      if (rt == pc) {
+        MOZ_CRASH();
+      } else {
+        uint32_t rt_value = get_register(rt);
+        n_flag_FPSCR_ = (rt_value >> 31) & 1;
+        z_flag_FPSCR_ = (rt_value >> 30) & 1;
+        c_flag_FPSCR_ = (rt_value >> 29) & 1;
+        v_flag_FPSCR_ = (rt_value >> 28) & 1;
+        FPSCR_default_NaN_mode_ = (rt_value >> 25) & 1;
+        inexact_vfp_flag_ = (rt_value >> 4) & 1;
+        underflow_vfp_flag_ = (rt_value >> 3) & 1;
+        overflow_vfp_flag_ = (rt_value >> 2) & 1;
+        div_zero_vfp_flag_ = (rt_value >> 1) & 1;
+        inv_op_vfp_flag_ = (rt_value >> 0) & 1;
+        FPSCR_rounding_mode_ =
+            static_cast<VFPRoundingMode>((rt_value)&kVFPRoundingModeMask);
+      }
+    } else {
+      MOZ_CRASH();
+    }
+  }
+}
+
+void Simulator::decodeVMOVBetweenCoreAndSinglePrecisionRegisters(
+    SimInstruction* instr) {
+  MOZ_ASSERT(instr->bit(4) == 1 && instr->VCValue() == 0x0 &&
+             instr->VAValue() == 0x0);
+
+  int t = instr->rtValue();
+  int n = instr->VFPNRegValue(kSinglePrecision);
+  bool to_arm_register = (instr->VLValue() == 0x1);
+  if (to_arm_register) {
+    int32_t int_value = get_sinteger_from_s_register(n);
+    set_register(t, int_value);
+  } else {
+    int32_t rs_val = get_register(t);
+    set_s_register_from_sinteger(n, rs_val);
+  }
+}
+
+void Simulator::decodeVCMP(SimInstruction* instr) {
+  MOZ_ASSERT((instr->bit(4) == 0) && (instr->opc1Value() == 0x7));
+  MOZ_ASSERT(((instr->opc2Value() == 0x4) || (instr->opc2Value() == 0x5)) &&
+             (instr->opc3Value() & 0x1));
+  // Comparison.
+
+  VFPRegPrecision precision = kSinglePrecision;
+  if (instr->szValue() == 1) {
+    precision = kDoublePrecision;
+  }
+
+  int d = instr->VFPDRegValue(precision);
+  int m = 0;
+  if (instr->opc2Value() == 0x4) {
+    m = instr->VFPMRegValue(precision);
+  }
+
+  if (precision == kDoublePrecision) {
+    double dd_value;
+    get_double_from_d_register(d, &dd_value);
+    double dm_value = 0.0;
+    if (instr->opc2Value() == 0x4) {
+      get_double_from_d_register(m, &dm_value);
+    }
+
+    // Raise exceptions for quiet NaNs if necessary.
+    if (instr->bit(7) == 1) {
+      if (std::isnan(dd_value)) {
+        inv_op_vfp_flag_ = true;
+      }
+    }
+    compute_FPSCR_Flags(dd_value, dm_value);
+  } else {
+    float fd_value;
+    get_float_from_s_register(d, &fd_value);
+    float fm_value = 0.0;
+    if (instr->opc2Value() == 0x4) {
+      get_float_from_s_register(m, &fm_value);
+    }
+
+    // Raise exceptions for quiet NaNs if necessary.
+    if (instr->bit(7) == 1) {
+      if (std::isnan(fd_value)) {
+        inv_op_vfp_flag_ = true;
+      }
+    }
+    compute_FPSCR_Flags(fd_value, fm_value);
+  }
+}
+
+void Simulator::decodeVCVTBetweenDoubleAndSingle(SimInstruction* instr) {
+  MOZ_ASSERT(instr->bit(4) == 0 && instr->opc1Value() == 0x7);
+  MOZ_ASSERT(instr->opc2Value() == 0x7 && instr->opc3Value() == 0x3);
+
+  VFPRegPrecision dst_precision = kDoublePrecision;
+  VFPRegPrecision src_precision = kSinglePrecision;
+  if (instr->szValue() == 1) {
+    dst_precision = kSinglePrecision;
+    src_precision = kDoublePrecision;
+  }
+
+  int dst = instr->VFPDRegValue(dst_precision);
+  int src = instr->VFPMRegValue(src_precision);
+
+  if (dst_precision == kSinglePrecision) {
+    double val;
+    get_double_from_d_register(src, &val);
+    set_s_register_from_float(dst, static_cast<float>(val));
+  } else {
+    float val;
+    get_float_from_s_register(src, &val);
+    set_d_register_from_double(dst, static_cast<double>(val));
+  }
+}
+
+static bool get_inv_op_vfp_flag(VFPRoundingMode mode, double val,
+                                bool unsigned_) {
+  MOZ_ASSERT(mode == SimRN || mode == SimRM || mode == SimRZ);
+  double max_uint = static_cast<double>(0xffffffffu);
+  double max_int = static_cast<double>(INT32_MAX);
+  double min_int = static_cast<double>(INT32_MIN);
+
+  // Check for NaN.
+  if (val != val) {
+    return true;
+  }
+
+  // Check for overflow. This code works because 32bit integers can be exactly
+  // represented by ieee-754 64bit floating-point values.
+  switch (mode) {
+    case SimRN:
+      return unsigned_ ? (val >= (max_uint + 0.5)) || (val < -0.5)
+                       : (val >= (max_int + 0.5)) || (val < (min_int - 0.5));
+    case SimRM:
+      return unsigned_ ? (val >= (max_uint + 1.0)) || (val < 0)
+                       : (val >= (max_int + 1.0)) || (val < min_int);
+    case SimRZ:
+      return unsigned_ ? (val >= (max_uint + 1.0)) || (val <= -1)
+                       : (val >= (max_int + 1.0)) || (val <= (min_int - 1.0));
+    default:
+      MOZ_CRASH();
+      return true;
+  }
+}
+
+// We call this function only if we had a vfp invalid exception.
+// It returns the correct saturated value.
+static int VFPConversionSaturate(double val, bool unsigned_res) {
+  if (val != val) {  // NaN.
+    return 0;
+  }
+  if (unsigned_res) {
+    return (val < 0) ? 0 : 0xffffffffu;
+  }
+  return (val < 0) ? INT32_MIN : INT32_MAX;
+}
+
+void Simulator::decodeVCVTBetweenFloatingPointAndInteger(
+    SimInstruction* instr) {
+  MOZ_ASSERT((instr->bit(4) == 0) && (instr->opc1Value() == 0x7) &&
+             (instr->bits(27, 23) == 0x1D));
+  MOZ_ASSERT(
+      ((instr->opc2Value() == 0x8) && (instr->opc3Value() & 0x1)) ||
+      (((instr->opc2Value() >> 1) == 0x6) && (instr->opc3Value() & 0x1)));
+
+  // Conversion between floating-point and integer.
+  bool to_integer = (instr->bit(18) == 1);
+
+  VFPRegPrecision src_precision =
+      (instr->szValue() == 1) ? kDoublePrecision : kSinglePrecision;
+
+  if (to_integer) {
+    // We are playing with code close to the C++ standard's limits below,
+    // hence the very simple code and heavy checks.
+    //
+    // Note: C++ defines default type casting from floating point to integer
+    // as (close to) rounding toward zero ("fractional part discarded").
+
+    int dst = instr->VFPDRegValue(kSinglePrecision);
+    int src = instr->VFPMRegValue(src_precision);
+
+    // Bit 7 in vcvt instructions indicates if we should use the FPSCR
+    // rounding mode or the default Round to Zero mode.
+    VFPRoundingMode mode = (instr->bit(7) != 1) ? FPSCR_rounding_mode_ : SimRZ;
+    MOZ_ASSERT(mode == SimRM || mode == SimRZ || mode == SimRN);
+
+    bool unsigned_integer = (instr->bit(16) == 0);
+    bool double_precision = (src_precision == kDoublePrecision);
+
+    double val;
+    if (double_precision) {
+      get_double_from_d_register(src, &val);
+    } else {
+      float fval;
+      get_float_from_s_register(src, &fval);
+      val = double(fval);
+    }
+
+    int temp = unsigned_integer ? static_cast<uint32_t>(val)
+                                : static_cast<int32_t>(val);
+
+    inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);
+
+    double abs_diff = unsigned_integer
+                          ? std::fabs(val - static_cast<uint32_t>(temp))
+                          : std::fabs(val - temp);
+
+    inexact_vfp_flag_ = (abs_diff != 0);
+
+    if (inv_op_vfp_flag_) {
+      temp = VFPConversionSaturate(val, unsigned_integer);
+    } else {
+      switch (mode) {
+        case SimRN: {
+          int val_sign = (val > 0) ? 1 : -1;
+          if (abs_diff > 0.5) {
+            temp += val_sign;
+          } else if (abs_diff == 0.5) {
+            // Round to even if exactly halfway.
+            temp = ((temp % 2) == 0) ? temp : temp + val_sign;
+          }
+          break;
+        }
+
+        case SimRM:
+          temp = temp > val ? temp - 1 : temp;
+          break;
+
+        case SimRZ:
+          // Nothing to do.
+          break;
+
+        default:
+          MOZ_CRASH();
+      }
+    }
+
+    // Update the destination register.
+    set_s_register_from_sinteger(dst, temp);
+  } else {
+    bool unsigned_integer = (instr->bit(7) == 0);
+    int dst = instr->VFPDRegValue(src_precision);
+    int src = instr->VFPMRegValue(kSinglePrecision);
+
+    int val = get_sinteger_from_s_register(src);
+
+    if (src_precision == kDoublePrecision) {
+      if (unsigned_integer) {
+        set_d_register_from_double(
+            dst, static_cast<double>(static_cast<uint32_t>(val)));
+      } else {
+        set_d_register_from_double(dst, static_cast<double>(val));
+      }
+    } else {
+      if (unsigned_integer) {
+        set_s_register_from_float(
+            dst, static_cast<float>(static_cast<uint32_t>(val)));
+      } else {
+        set_s_register_from_float(dst, static_cast<float>(val));
+      }
+    }
+  }
+}
+
+// A VFPv3 specific instruction.
+void Simulator::decodeVCVTBetweenFloatingPointAndIntegerFrac(
+    SimInstruction* instr) {
+  MOZ_ASSERT(instr->bits(27, 24) == 0xE && instr->opc1Value() == 0x7 &&
+             instr->bit(19) == 1 && instr->bit(17) == 1 &&
+             instr->bits(11, 9) == 0x5 && instr->bit(6) == 1 &&
+             instr->bit(4) == 0);
+
+  int size = (instr->bit(7) == 1) ? 32 : 16;
+
+  int fraction_bits = size - ((instr->bits(3, 0) << 1) | instr->bit(5));
+  double mult = 1 << fraction_bits;
+
+  MOZ_ASSERT(size == 32);  // Only handling size == 32 for now.
+
+  // Conversion between floating-point and integer.
+  bool to_fixed = (instr->bit(18) == 1);
+
+  VFPRegPrecision precision =
+      (instr->szValue() == 1) ? kDoublePrecision : kSinglePrecision;
+
+  if (to_fixed) {
+    // We are playing with code close to the C++ standard's limits below,
+    // hence the very simple code and heavy checks.
+    //
+    // Note: C++ defines default type casting from floating point to integer
+    // as (close to) rounding toward zero ("fractional part discarded").
+
+    int dst = instr->VFPDRegValue(precision);
+
+    bool unsigned_integer = (instr->bit(16) == 1);
+    bool double_precision = (precision == kDoublePrecision);
+
+    double val;
+    if (double_precision) {
+      get_double_from_d_register(dst, &val);
+    } else {
+      float fval;
+      get_float_from_s_register(dst, &fval);
+      val = double(fval);
+    }
+
+    // Scale value by specified number of fraction bits.
+    val *= mult;
+
+    // Rounding down towards zero. No need to account for the rounding error
+    // as this instruction always rounds down towards zero. See SimRZ below.
+    int temp = unsigned_integer ? static_cast<uint32_t>(val)
+                                : static_cast<int32_t>(val);
+
+    inv_op_vfp_flag_ = get_inv_op_vfp_flag(SimRZ, val, unsigned_integer);
+
+    double abs_diff = unsigned_integer
+                          ? std::fabs(val - static_cast<uint32_t>(temp))
+                          : std::fabs(val - temp);
+
+    inexact_vfp_flag_ = (abs_diff != 0);
+
+    if (inv_op_vfp_flag_) {
+      temp = VFPConversionSaturate(val, unsigned_integer);
+    }
+
+    // Update the destination register.
+    if (double_precision) {
+      uint32_t dbl[2];
+      dbl[0] = temp;
+      dbl[1] = 0;
+      set_d_register(dst, dbl);
+    } else {
+      set_s_register_from_sinteger(dst, temp);
+    }
+  } else {
+    MOZ_CRASH();  // Not implemented, fixed to float.
+  }
+}
+
+void Simulator::decodeType6CoprocessorIns(SimInstruction* instr) {
+  MOZ_ASSERT(instr->typeValue() == 6);
+
+  if (instr->coprocessorValue() == 0xA) {
+    switch (instr->opcodeValue()) {
+      case 0x8:
+      case 0xA:
+      case 0xC:
+      case 0xE: {  // Load and store single precision float to memory.
+        int rn = instr->rnValue();
+        int vd = instr->VFPDRegValue(kSinglePrecision);
+        int offset = instr->immed8Value();
+        if (!instr->hasU()) {
+          offset = -offset;
+        }
+
+        int32_t address = get_register(rn) + 4 * offset;
+        if (instr->hasL()) {
+          // Load double from memory: vldr.
+          set_s_register_from_sinteger(vd, readW(address, instr));
+        } else {
+          // Store double to memory: vstr.
+          writeW(address, get_sinteger_from_s_register(vd), instr);
+        }
+        break;
+      }
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7:
+      case 0x9:
+      case 0xB:
+        // Load/store multiple single from memory: vldm/vstm.
+        handleVList(instr);
+        break;
+      default:
+        MOZ_CRASH();
+    }
+  } else if (instr->coprocessorValue() == 0xB) {
+    switch (instr->opcodeValue()) {
+      case 0x2:
+        // Load and store double to two GP registers
+        if (instr->bits(7, 6) != 0 || instr->bit(4) != 1) {
+          MOZ_CRASH();  // Not used atm.
+        } else {
+          int rt = instr->rtValue();
+          int rn = instr->rnValue();
+          int vm = instr->VFPMRegValue(kDoublePrecision);
+          if (instr->hasL()) {
+            int32_t data[2];
+            double d;
+            get_double_from_d_register(vm, &d);
+            memcpy(data, &d, 8);
+            set_register(rt, data[0]);
+            set_register(rn, data[1]);
+          } else {
+            int32_t data[] = {get_register(rt), get_register(rn)};
+            double d;
+            memcpy(&d, data, 8);
+            set_d_register_from_double(vm, d);
+          }
+        }
+        break;
+      case 0x8:
+      case 0xA:
+      case 0xC:
+      case 0xE: {  // Load and store double to memory.
+        int rn = instr->rnValue();
+        int vd = instr->VFPDRegValue(kDoublePrecision);
+        int offset = instr->immed8Value();
+        if (!instr->hasU()) {
+          offset = -offset;
+        }
+        int32_t address = get_register(rn) + 4 * offset;
+        if (instr->hasL()) {
+          // Load double from memory: vldr.
+          uint64_t data = readQ(address, instr);
+          double val;
+          memcpy(&val, &data, 8);
+          set_d_register_from_double(vd, val);
+        } else {
+          // Store double to memory: vstr.
+          uint64_t data;
+          double val;
+          get_double_from_d_register(vd, &val);
+          memcpy(&data, &val, 8);
+          writeQ(address, data, instr);
+        }
+        break;
+      }
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7:
+      case 0x9:
+      case 0xB:
+        // Load/store multiple double from memory: vldm/vstm.
+        handleVList(instr);
+        break;
+      default:
+        MOZ_CRASH();
+    }
+  } else {
+    MOZ_CRASH();
+  }
+}
+
+void Simulator::decodeSpecialCondition(SimInstruction* instr) {
+  switch (instr->specialValue()) {
+    case 5:
+      if (instr->bits(18, 16) == 0 && instr->bits(11, 6) == 0x28 &&
+          instr->bit(4) == 1) {
+        // vmovl signed
+        if ((instr->vdValue() & 1) != 0) {
+          MOZ_CRASH("Undefined behavior");
+        }
+        int Vd = (instr->bit(22) << 3) | (instr->vdValue() >> 1);
+        int Vm = (instr->bit(5) << 4) | instr->vmValue();
+        int imm3 = instr->bits(21, 19);
+        if (imm3 != 1 && imm3 != 2 && imm3 != 4) {
+          MOZ_CRASH();
+        }
+        int esize = 8 * imm3;
+        int elements = 64 / esize;
+        int8_t from[8];
+        get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
+        int16_t to[8];
+        int e = 0;
+        while (e < elements) {
+          to[e] = from[e];
+          e++;
+        }
+        set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 7:
+      if (instr->bits(18, 16) == 0 && instr->bits(11, 6) == 0x28 &&
+          instr->bit(4) == 1) {
+        // vmovl unsigned.
+        if ((instr->vdValue() & 1) != 0) {
+          MOZ_CRASH("Undefined behavior");
+        }
+        int Vd = (instr->bit(22) << 3) | (instr->vdValue() >> 1);
+        int Vm = (instr->bit(5) << 4) | instr->vmValue();
+        int imm3 = instr->bits(21, 19);
+        if (imm3 != 1 && imm3 != 2 && imm3 != 4) {
+          MOZ_CRASH();
+        }
+        int esize = 8 * imm3;
+        int elements = 64 / esize;
+        uint8_t from[8];
+        get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
+        uint16_t to[8];
+        int e = 0;
+        while (e < elements) {
+          to[e] = from[e];
+          e++;
+        }
+        set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 8:
+      if (instr->bits(21, 20) == 0) {
+        // vst1
+        int Vd = (instr->bit(22) << 4) | instr->vdValue();
+        int Rn = instr->vnValue();
+        int type = instr->bits(11, 8);
+        int Rm = instr->vmValue();
+        int32_t address = get_register(Rn);
+        int regs = 0;
+        switch (type) {
+          case nlt_1:
+            regs = 1;
+            break;
+          case nlt_2:
+            regs = 2;
+            break;
+          case nlt_3:
+            regs = 3;
+            break;
+          case nlt_4:
+            regs = 4;
+            break;
+          default:
+            MOZ_CRASH();
+            break;
+        }
+        int r = 0;
+        while (r < regs) {
+          uint32_t data[2];
+          get_d_register(Vd + r, data);
+          // TODO: We should AllowUnaligned here only if the alignment attribute
+          // of the instruction calls for default alignment.
+          //
+          // Use writeQ to get handling of traps right.  (The spec says to
+          // perform two individual word writes, but let's not worry about
+          // that.)
+          writeQ(address, (uint64_t(data[1]) << 32) | uint64_t(data[0]), instr,
+                 AllowUnaligned);
+          address += 8;
+          r++;
+        }
+        if (Rm != 15) {
+          if (Rm == 13) {
+            set_register(Rn, address);
+          } else {
+            set_register(Rn, get_register(Rn) + get_register(Rm));
+          }
+        }
+      } else if (instr->bits(21, 20) == 2) {
+        // vld1
+        int Vd = (instr->bit(22) << 4) | instr->vdValue();
+        int Rn = instr->vnValue();
+        int type = instr->bits(11, 8);
+        int Rm = instr->vmValue();
+        int32_t address = get_register(Rn);
+        int regs = 0;
+        switch (type) {
+          case nlt_1:
+            regs = 1;
+            break;
+          case nlt_2:
+            regs = 2;
+            break;
+          case nlt_3:
+            regs = 3;
+            break;
+          case nlt_4:
+            regs = 4;
+            break;
+          default:
+            MOZ_CRASH();
+            break;
+        }
+        int r = 0;
+        while (r < regs) {
+          uint32_t data[2];
+          // TODO: We should AllowUnaligned here only if the alignment attribute
+          // of the instruction calls for default alignment.
+          //
+          // Use readQ to get handling of traps right.  (The spec says to
+          // perform two individual word reads, but let's not worry about that.)
+          uint64_t tmp = readQ(address, instr, AllowUnaligned);
+          data[0] = tmp;
+          data[1] = tmp >> 32;
+          set_d_register(Vd + r, data);
+          address += 8;
+          r++;
+        }
+        if (Rm != 15) {
+          if (Rm == 13) {
+            set_register(Rn, address);
+          } else {
+            set_register(Rn, get_register(Rn) + get_register(Rm));
+          }
+        }
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 9:
+      if (instr->bits(9, 8) == 0) {
+        int Vd = (instr->bit(22) << 4) | instr->vdValue();
+        int Rn = instr->vnValue();
+        int size = instr->bits(11, 10);
+        int Rm = instr->vmValue();
+        int index = instr->bits(7, 5);
+        int align = instr->bit(4);
+        int32_t address = get_register(Rn);
+        if (size != 2 || align) {
+          MOZ_CRASH("NYI");
+        }
+        int a = instr->bits(5, 4);
+        if (a != 0 && a != 3) {
+          MOZ_CRASH("Unspecified");
+        }
+        if (index > 1) {
+          Vd++;
+          index -= 2;
+        }
+        uint32_t data[2];
+        get_d_register(Vd, data);
+        switch (instr->bits(21, 20)) {
+          case 0:
+            // vst1 single element from one lane
+            writeW(address, data[index], instr, AllowUnaligned);
+            break;
+          case 2:
+            // vld1 single element to one lane
+            data[index] = readW(address, instr, AllowUnaligned);
+            set_d_register(Vd, data);
+            break;
+          default:
+            MOZ_CRASH("NYI");
+        }
+        address += 4;
+        if (Rm != 15) {
+          if (Rm == 13) {
+            set_register(Rn, address);
+          } else {
+            set_register(Rn, get_register(Rn) + get_register(Rm));
+          }
+        }
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 0xA:
+      if (instr->bits(31, 20) == 0xf57) {
+        switch (instr->bits(7, 4)) {
+          case 1:  // CLREX
+            exclusiveMonitorClear();
+            break;
+          case 5:  // DMB
+            AtomicOperations::fenceSeqCst();
+            break;
+          case 4:  // DSB
+            // We do not use DSB.
+            MOZ_CRASH("DSB unimplemented");
+          case 6:  // ISB
+            // We do not use ISB.
+            MOZ_CRASH("ISB unimplemented");
+          default:
+            MOZ_CRASH();
+        }
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 0xB:
+      if (instr->bits(22, 20) == 5 && instr->bits(15, 12) == 0xf) {
+        // pld: ignore instruction.
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 0x1C:
+    case 0x1D:
+      if (instr->bit(4) == 1 && instr->bits(11, 9) != 5) {
+        // MCR, MCR2, MRC, MRC2 with cond == 15
+        decodeType7CoprocessorIns(instr);
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    default:
+      MOZ_CRASH();
+  }
+}
+
+// Executes the current instruction.
+void Simulator::instructionDecode(SimInstruction* instr) {
+  if (!SimulatorProcess::ICacheCheckingDisableCount) {
+    AutoLockSimulatorCache als;
+    SimulatorProcess::checkICacheLocked(instr);
+  }
+
+  pc_modified_ = false;
+
+  static const uint32_t kSpecialCondition = 15 << 28;
+  if (instr->conditionField() == kSpecialCondition) {
+    decodeSpecialCondition(instr);
+  } else if (conditionallyExecute(instr)) {
+    switch (instr->typeValue()) {
+      case 0:
+      case 1:
+        decodeType01(instr);
+        break;
+      case 2:
+        decodeType2(instr);
+        break;
+      case 3:
+        decodeType3(instr);
+        break;
+      case 4:
+        decodeType4(instr);
+        break;
+      case 5:
+        decodeType5(instr);
+        break;
+      case 6:
+        decodeType6(instr);
+        break;
+      case 7:
+        decodeType7(instr);
+        break;
+      default:
+        MOZ_CRASH();
+        break;
+    }
+    // If the instruction is a non taken conditional stop, we need to skip
+    // the inlined message address.
+  } else if (instr->isStop()) {
+    set_pc(get_pc() + 2 * SimInstruction::kInstrSize);
+  }
+  if (!pc_modified_) {
+    set_register(pc,
+                 reinterpret_cast<int32_t>(instr) + SimInstruction::kInstrSize);
+  }
+}
+
+void Simulator::enable_single_stepping(SingleStepCallback cb, void* arg) {
+  single_stepping_ = true;
+  single_step_callback_ = cb;
+  single_step_callback_arg_ = arg;
+  single_step_callback_(single_step_callback_arg_, this, (void*)get_pc());
+}
+
+void Simulator::disable_single_stepping() {
+  if (!single_stepping_) {
+    return;
+  }
+  single_step_callback_(single_step_callback_arg_, this, (void*)get_pc());
+  single_stepping_ = false;
+  single_step_callback_ = nullptr;
+  single_step_callback_arg_ = nullptr;
+}
+
+template <bool EnableStopSimAt>
+void Simulator::execute() {
+  if (single_stepping_) {
+    single_step_callback_(single_step_callback_arg_, this, nullptr);
+  }
+
+  // Get the PC to simulate. Cannot use the accessor here as we need the raw
+  // PC value and not the one used as input to arithmetic instructions.
+  int program_counter = get_pc();
+
+  while (program_counter != end_sim_pc) {
+    if (EnableStopSimAt && (icount_ == Simulator::StopSimAt)) {
+      fprintf(stderr, "\nStopped simulation at icount %lld\n", icount_);
+      ArmDebugger dbg(this);
+      dbg.debug();
+    } else {
+      if (single_stepping_) {
+        single_step_callback_(single_step_callback_arg_, this,
+                              (void*)program_counter);
+      }
+      SimInstruction* instr =
+          reinterpret_cast<SimInstruction*>(program_counter);
+      instructionDecode(instr);
+      icount_++;
+    }
+    program_counter = get_pc();
+  }
+
+  if (single_stepping_) {
+    single_step_callback_(single_step_callback_arg_, this, nullptr);
+  }
+}
+
+void Simulator::callInternal(uint8_t* entry) {
+  // Prepare to execute the code at entry.
+  set_register(pc, reinterpret_cast<int32_t>(entry));
+
+  // Put down marker for end of simulation. The simulator will stop simulation
+  // when the PC reaches this value. By saving the "end simulation" value into
+  // the LR the simulation stops when returning to this call point.
+  set_register(lr, end_sim_pc);
+
+  // Remember the values of callee-saved registers. The code below assumes
+  // that r9 is not used as sb (static base) in simulator code and therefore
+  // is regarded as a callee-saved register.
+  int32_t r4_val = get_register(r4);
+  int32_t r5_val = get_register(r5);
+  int32_t r6_val = get_register(r6);
+  int32_t r7_val = get_register(r7);
+  int32_t r8_val = get_register(r8);
+  int32_t r9_val = get_register(r9);
+  int32_t r10_val = get_register(r10);
+  int32_t r11_val = get_register(r11);
+
+  // Remember d8 to d15 which are callee-saved.
+  uint64_t d8_val;
+  get_d_register(d8, &d8_val);
+  uint64_t d9_val;
+  get_d_register(d9, &d9_val);
+  uint64_t d10_val;
+  get_d_register(d10, &d10_val);
+  uint64_t d11_val;
+  get_d_register(d11, &d11_val);
+  uint64_t d12_val;
+  get_d_register(d12, &d12_val);
+  uint64_t d13_val;
+  get_d_register(d13, &d13_val);
+  uint64_t d14_val;
+  get_d_register(d14, &d14_val);
+  uint64_t d15_val;
+  get_d_register(d15, &d15_val);
+
+  // Set up the callee-saved registers with a known value. To be able to check
+  // that they are preserved properly across JS execution.
+  int32_t callee_saved_value = uint32_t(icount_);
+  uint64_t callee_saved_value_d = uint64_t(icount_);
+
+  if (!skipCalleeSavedRegsCheck) {
+    set_register(r4, callee_saved_value);
+    set_register(r5, callee_saved_value);
+    set_register(r6, callee_saved_value);
+    set_register(r7, callee_saved_value);
+    set_register(r8, callee_saved_value);
+    set_register(r9, callee_saved_value);
+    set_register(r10, callee_saved_value);
+    set_register(r11, callee_saved_value);
+
+    set_d_register(d8, &callee_saved_value_d);
+    set_d_register(d9, &callee_saved_value_d);
+    set_d_register(d10, &callee_saved_value_d);
+    set_d_register(d11, &callee_saved_value_d);
+    set_d_register(d12, &callee_saved_value_d);
+    set_d_register(d13, &callee_saved_value_d);
+    set_d_register(d14, &callee_saved_value_d);
+    set_d_register(d15, &callee_saved_value_d);
+  }
+  // Start the simulation.
+  if (Simulator::StopSimAt != -1L) {
+    execute<true>();
+  } else {
+    execute<false>();
+  }
+
+  if (!skipCalleeSavedRegsCheck) {
+    // Check that the callee-saved registers have been preserved.
+    MOZ_ASSERT(callee_saved_value == get_register(r4));
+    MOZ_ASSERT(callee_saved_value == get_register(r5));
+    MOZ_ASSERT(callee_saved_value == get_register(r6));
+    MOZ_ASSERT(callee_saved_value == get_register(r7));
+    MOZ_ASSERT(callee_saved_value == get_register(r8));
+    MOZ_ASSERT(callee_saved_value == get_register(r9));
+    MOZ_ASSERT(callee_saved_value == get_register(r10));
+    MOZ_ASSERT(callee_saved_value == get_register(r11));
+
+    uint64_t value;
+    get_d_register(d8, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d9, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d10, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d11, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d12, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d13, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d14, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d15, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+
+    // Restore callee-saved registers with the original value.
+    set_register(r4, r4_val);
+    set_register(r5, r5_val);
+    set_register(r6, r6_val);
+    set_register(r7, r7_val);
+    set_register(r8, r8_val);
+    set_register(r9, r9_val);
+    set_register(r10, r10_val);
+    set_register(r11, r11_val);
+
+    set_d_register(d8, &d8_val);
+    set_d_register(d9, &d9_val);
+    set_d_register(d10, &d10_val);
+    set_d_register(d11, &d11_val);
+    set_d_register(d12, &d12_val);
+    set_d_register(d13, &d13_val);
+    set_d_register(d14, &d14_val);
+    set_d_register(d15, &d15_val);
+  }
+}
+
+int32_t Simulator::call(uint8_t* entry, int argument_count, ...) {
+  va_list parameters;
+  va_start(parameters, argument_count);
+
+  // First four arguments passed in registers.
+  if (argument_count >= 1) {
+    set_register(r0, va_arg(parameters, int32_t));
+  }
+  if (argument_count >= 2) {
+    set_register(r1, va_arg(parameters, int32_t));
+  }
+  if (argument_count >= 3) {
+    set_register(r2, va_arg(parameters, int32_t));
+  }
+  if (argument_count >= 4) {
+    set_register(r3, va_arg(parameters, int32_t));
+  }
+
+  // Remaining arguments passed on stack.
+  int original_stack = get_register(sp);
+  int entry_stack = original_stack;
+  if (argument_count >= 4) {
+    entry_stack -= (argument_count - 4) * sizeof(int32_t);
+  }
+
+  entry_stack &= ~ABIStackAlignment;
+
+  // Store remaining arguments on stack, from low to high memory.
+  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
+  for (int i = 4; i < argument_count; i++) {
+    stack_argument[i - 4] = va_arg(parameters, int32_t);
+  }
+  va_end(parameters);
+  set_register(sp, entry_stack);
+
+  callInternal(entry);
+
+  // Pop stack passed arguments.
+  MOZ_ASSERT(entry_stack == get_register(sp));
+  set_register(sp, original_stack);
+
+  int32_t result = get_register(r0);
+  return result;
+}
+
+Simulator* Simulator::Current() {
+  JSContext* cx = TlsContext.get();
+  MOZ_ASSERT(CurrentThreadCanAccessRuntime(cx->runtime()));
+  return cx->simulator();
+}
+
+}  // namespace jit
+}  // namespace js
+
+js::jit::Simulator* JSContext::simulator() const { return simulator_; }
diff --git a/js/src/jit/arm/Simulator-arm.h b/js/src/jit/arm/Simulator-arm.h
new file mode 100644
index 0000000000..fba0f8ce5e
--- /dev/null
+++ b/js/src/jit/arm/Simulator-arm.h
@@ -0,0 +1,632 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef jit_arm_Simulator_arm_h
+#define jit_arm_Simulator_arm_h
+
+#ifdef JS_SIMULATOR_ARM
+
+#  include "mozilla/Atomics.h"
+
+#  include "jit/arm/Architecture-arm.h"
+#  include "jit/arm/disasm/Disasm-arm.h"
+#  include "jit/IonTypes.h"
+#  include "js/AllocPolicy.h"
+#  include "js/ProfilingFrameIterator.h"
+#  include "threading/Thread.h"
+#  include "vm/MutexIDs.h"
+#  include "wasm/WasmSignalHandlers.h"
+
+namespace js {
+namespace jit {
+
+class JitActivation;
+class Simulator;
+class Redirection;
+class CachePage;
+class AutoLockSimulator;
+
+// When the SingleStepCallback is called, the simulator is about to execute
+// sim->get_pc() and the current machine state represents the completed
+// execution of the previous pc.
+typedef void (*SingleStepCallback)(void* arg, Simulator* sim, void* pc);
+
+// VFP rounding modes. See ARM DDI 0406B Page A2-29.
+enum VFPRoundingMode {
+  SimRN = 0 << 22,  // Round to Nearest.
+  SimRP = 1 << 22,  // Round towards Plus Infinity.
+  SimRM = 2 << 22,  // Round towards Minus Infinity.
+  SimRZ = 3 << 22,  // Round towards zero.
+
+  // Aliases.
+  kRoundToNearest = SimRN,
+  kRoundToPlusInf = SimRP,
+  kRoundToMinusInf = SimRM,
+  kRoundToZero = SimRZ
+};
+
+const uint32_t kVFPRoundingModeMask = 3 << 22;
+
+typedef int32_t Instr;
+class SimInstruction;
+
+// Per thread simulator state.
+class Simulator {
+ public:
+  friend class ArmDebugger;
+  enum Register {
+    no_reg = -1,
+    r0 = 0,
+    r1,
+    r2,
+    r3,
+    r4,
+    r5,
+    r6,
+    r7,
+    r8,
+    r9,
+    r10,
+    r11,
+    r12,
+    r13,
+    r14,
+    r15,
+    num_registers,
+    fp = 11,
+    ip = 12,
+    sp = 13,
+    lr = 14,
+    pc = 15,
+    s0 = 0,
+    s1,
+    s2,
+    s3,
+    s4,
+    s5,
+    s6,
+    s7,
+    s8,
+    s9,
+    s10,
+    s11,
+    s12,
+    s13,
+    s14,
+    s15,
+    s16,
+    s17,
+    s18,
+    s19,
+    s20,
+    s21,
+    s22,
+    s23,
+    s24,
+    s25,
+    s26,
+    s27,
+    s28,
+    s29,
+    s30,
+    s31,
+    num_s_registers = 32,
+    d0 = 0,
+    d1,
+    d2,
+    d3,
+    d4,
+    d5,
+    d6,
+    d7,
+    d8,
+    d9,
+    d10,
+    d11,
+    d12,
+    d13,
+    d14,
+    d15,
+    d16,
+    d17,
+    d18,
+    d19,
+    d20,
+    d21,
+    d22,
+    d23,
+    d24,
+    d25,
+    d26,
+    d27,
+    d28,
+    d29,
+    d30,
+    d31,
+    num_d_registers = 32,
+    q0 = 0,
+    q1,
+    q2,
+    q3,
+    q4,
+    q5,
+    q6,
+    q7,
+    q8,
+    q9,
+    q10,
+    q11,
+    q12,
+    q13,
+    q14,
+    q15,
+    num_q_registers = 16
+  };
+
+  // Returns nullptr on OOM.
+  static Simulator* Create();
+
+  static void Destroy(Simulator* simulator);
+
+  // Constructor/destructor are for internal use only; use the static methods
+  // above.
+  Simulator();
+  ~Simulator();
+
+  // The currently executing Simulator instance. Potentially there can be one
+  // for each native thread.
+  static Simulator* Current();
+
+  static uintptr_t StackLimit() { return Simulator::Current()->stackLimit(); }
+
+  // Disassemble some instructions starting at instr and print them
+  // on stdout.  Useful for working within GDB after a MOZ_CRASH(),
+  // among other things.
+  //
+  // Typical use within a crashed instruction decoding method is simply:
+  //
+  //   call Simulator::disassemble(instr, 1)
+  //
+  // or use one of the more convenient inline methods below.
+  static void disassemble(SimInstruction* instr, size_t n);
+
+  // Disassemble one instruction.
+  // "call disasm(instr)"
+  void disasm(SimInstruction* instr);
+
+  // Disassemble n instructions starting at instr.
+  // "call disasm(instr, 3)"
+  void disasm(SimInstruction* instr, size_t n);
+
+  // Skip backwards m instructions before starting, then disassemble n
+  // instructions.
+  // "call disasm(instr, 3, 7)"
+  void disasm(SimInstruction* instr, size_t m, size_t n);
+
+  uintptr_t* addressOfStackLimit();
+
+  // Accessors for register state. Reading the pc value adheres to the ARM
+  // architecture specification and is off by a 8 from the currently executing
+  // instruction.
+  void set_register(int reg, int32_t value);
+  int32_t get_register(int reg) const;
+  double get_double_from_register_pair(int reg);
+  void set_register_pair_from_double(int reg, double* value);
+  void set_dw_register(int dreg, const int* dbl);
+
+  // Support for VFP.
+  void get_d_register(int dreg, uint64_t* value);
+  void set_d_register(int dreg, const uint64_t* value);
+  void get_d_register(int dreg, uint32_t* value);
+  void set_d_register(int dreg, const uint32_t* value);
+  void get_q_register(int qreg, uint64_t* value);
+  void set_q_register(int qreg, const uint64_t* value);
+  void get_q_register(int qreg, uint32_t* value);
+  void set_q_register(int qreg, const uint32_t* value);
+  void set_s_register(int reg, unsigned int value);
+  unsigned int get_s_register(int reg) const;
+
+  void set_d_register_from_double(int dreg, const double& dbl) {
+    setVFPRegister<double, 2>(dreg, dbl);
+  }
+  void get_double_from_d_register(int dreg, double* out) {
+    getFromVFPRegister<double, 2>(dreg, out);
+  }
+  void set_s_register_from_float(int sreg, const float flt) {
+    setVFPRegister<float, 1>(sreg, flt);
+  }
+  void get_float_from_s_register(int sreg, float* out) {
+    getFromVFPRegister<float, 1>(sreg, out);
+  }
+  void set_s_register_from_sinteger(int sreg, const int sint) {
+    setVFPRegister<int, 1>(sreg, sint);
+  }
+  int get_sinteger_from_s_register(int sreg) {
+    int ret;
+    getFromVFPRegister<int, 1>(sreg, &ret);
+    return ret;
+  }
+
+  // Special case of set_register and get_register to access the raw PC value.
+  void set_pc(int32_t value);
+  int32_t get_pc() const;
+
+  template <typename T>
+  T get_pc_as() const {
+    return reinterpret_cast<T>(get_pc());
+  }
+
+  void enable_single_stepping(SingleStepCallback cb, void* arg);
+  void disable_single_stepping();
+
+  uintptr_t stackLimit() const;
+  bool overRecursed(uintptr_t newsp = 0) const;
+  bool overRecursedWithExtra(uint32_t extra) const;
+
+  // Executes ARM instructions until the PC reaches end_sim_pc.
+  template <bool EnableStopSimAt>
+  void execute();
+
+  // Sets up the simulator state and grabs the result on return.
+  int32_t call(uint8_t* entry, int argument_count, ...);
+
+  // Debugger input.
+  void setLastDebuggerInput(char* input);
+  char* lastDebuggerInput() { return lastDebuggerInput_; }
+
+  // Returns true if pc register contains one of the 'special_values' defined
+  // below (bad_lr, end_sim_pc).
+  bool has_bad_pc() const;
+
+ private:
+  enum special_values {
+    // Known bad pc value to ensure that the simulator does not execute
+    // without being properly setup.
+    bad_lr = -1,
+    // A pc value used to signal the simulator to stop execution. Generally
+    // the lr is set to this value on transition from native C code to
+    // simulated execution, so that the simulator can "return" to the native
+    // C code.
+    end_sim_pc = -2
+  };
+
+  // ForbidUnaligned means "always fault on unaligned access".
+  //
+  // AllowUnaligned means "allow the unaligned access if other conditions are
+  // met".  The "other conditions" vary with the instruction: For all
+  // instructions the base condition is !HasAlignmentFault(), ie, the chip is
+  // configured to allow unaligned accesses.  For instructions like VLD1
+  // there is an additional constraint that the alignment attribute in the
+  // instruction must be set to "default alignment".
+
+  enum UnalignedPolicy { ForbidUnaligned, AllowUnaligned };
+
+  bool init();
+
+  // Checks if the current instruction should be executed based on its
+  // condition bits.
+  inline bool conditionallyExecute(SimInstruction* instr);
+
+  // Helper functions to set the conditional flags in the architecture state.
+  void setNZFlags(int32_t val);
+  void setCFlag(bool val);
+  void setVFlag(bool val);
+  bool carryFrom(int32_t left, int32_t right, int32_t carry = 0);
+  bool borrowFrom(int32_t left, int32_t right);
+  bool overflowFrom(int32_t alu_out, int32_t left, int32_t right,
+                    bool addition);
+
+  inline int getCarry() { return c_flag_ ? 1 : 0; };
+
+  // Support for VFP.
+  void compute_FPSCR_Flags(double val1, double val2);
+  void copy_FPSCR_to_APSR();
+  inline void canonicalizeNaN(double* value);
+  inline void canonicalizeNaN(float* value);
+
+  // Helper functions to decode common "addressing" modes
+  int32_t getShiftRm(SimInstruction* instr, bool* carry_out);
+  int32_t getImm(SimInstruction* instr, bool* carry_out);
+  int32_t processPU(SimInstruction* instr, int num_regs, int operand_size,
+                    intptr_t* start_address, intptr_t* end_address);
+  void handleRList(SimInstruction* instr, bool load);
+  void handleVList(SimInstruction* inst);
+  void softwareInterrupt(SimInstruction* instr);
+
+  // Stop helper functions.
+  inline bool isStopInstruction(SimInstruction* instr);
+  inline bool isWatchedStop(uint32_t bkpt_code);
+  inline bool isEnabledStop(uint32_t bkpt_code);
+  inline void enableStop(uint32_t bkpt_code);
+  inline void disableStop(uint32_t bkpt_code);
+  inline void increaseStopCounter(uint32_t bkpt_code);
+  void printStopInfo(uint32_t code);
+
+  // Handle a wasm interrupt triggered by an async signal handler.
+  JS::ProfilingFrameIterator::RegisterState registerState();
+
+  // Handle any wasm faults, returning true if the fault was handled.
+  // This method is rather hot so inline the normal (no-wasm) case.
+  bool MOZ_ALWAYS_INLINE handleWasmSegFault(int32_t addr, unsigned numBytes) {
+    if (MOZ_LIKELY(!wasm::CodeExists)) {
+      return false;
+    }
+
+    uint8_t* newPC;
+    if (!wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes,
+                                 &newPC)) {
+      return false;
+    }
+
+    set_pc(int32_t(newPC));
+    return true;
+  }
+
+  // Read and write memory.
+  inline uint8_t readBU(int32_t addr);
+  inline int8_t readB(int32_t addr);
+  inline void writeB(int32_t addr, uint8_t value);
+  inline void writeB(int32_t addr, int8_t value);
+
+  inline uint8_t readExBU(int32_t addr);
+  inline int32_t writeExB(int32_t addr, uint8_t value);
+
+  inline uint16_t readHU(int32_t addr, SimInstruction* instr);
+  inline int16_t readH(int32_t addr, SimInstruction* instr);
+  // Note: Overloaded on the sign of the value.
+  inline void writeH(int32_t addr, uint16_t value, SimInstruction* instr);
+  inline void writeH(int32_t addr, int16_t value, SimInstruction* instr);
+
+  inline uint16_t readExHU(int32_t addr, SimInstruction* instr);
+  inline int32_t writeExH(int32_t addr, uint16_t value, SimInstruction* instr);
+
+  inline int readW(int32_t addr, SimInstruction* instr,
+                   UnalignedPolicy f = ForbidUnaligned);
+  inline void writeW(int32_t addr, int value, SimInstruction* instr,
+                     UnalignedPolicy f = ForbidUnaligned);
+
+  inline uint64_t readQ(int32_t addr, SimInstruction* instr,
+                        UnalignedPolicy f = ForbidUnaligned);
+  inline void writeQ(int32_t addr, uint64_t value, SimInstruction* instr,
+                     UnalignedPolicy f = ForbidUnaligned);
+
+  inline int readExW(int32_t addr, SimInstruction* instr);
+  inline int writeExW(int32_t addr, int value, SimInstruction* instr);
+
+  int32_t* readDW(int32_t addr);
+  void writeDW(int32_t addr, int32_t value1, int32_t value2);
+
+  int32_t readExDW(int32_t addr, int32_t* hibits);
+  int32_t writeExDW(int32_t addr, int32_t value1, int32_t value2);
+
+  // Executing is handled based on the instruction type.
+  // Both type 0 and type 1 rolled into one.
+  void decodeType01(SimInstruction* instr);
+  void decodeType2(SimInstruction* instr);
+  void decodeType3(SimInstruction* instr);
+  void decodeType4(SimInstruction* instr);
+  void decodeType5(SimInstruction* instr);
+  void decodeType6(SimInstruction* instr);
+  void decodeType7(SimInstruction* instr);
+
+  // Support for VFP.
+  void decodeTypeVFP(SimInstruction* instr);
+  void decodeType6CoprocessorIns(SimInstruction* instr);
+  void decodeSpecialCondition(SimInstruction* instr);
+
+  void decodeVMOVBetweenCoreAndSinglePrecisionRegisters(SimInstruction* instr);
+  void decodeVCMP(SimInstruction* instr);
+  void decodeVCVTBetweenDoubleAndSingle(SimInstruction* instr);
+  void decodeVCVTBetweenFloatingPointAndInteger(SimInstruction* instr);
+  void decodeVCVTBetweenFloatingPointAndIntegerFrac(SimInstruction* instr);
+
+  // Support for some system functions.
+  void decodeType7CoprocessorIns(SimInstruction* instr);
+
+  // Executes one instruction.
+  void instructionDecode(SimInstruction* instr);
+
+ public:
+  static int64_t StopSimAt;
+
+  // For testing the MoveResolver code, a MoveResolver is set up, and
+  // the VFP registers are loaded with pre-determined values,
+  // then the sequence of code is simulated.  In order to test this with the
+  // simulator, the callee-saved registers can't be trashed. This flag
+  // disables that feature.
+  bool skipCalleeSavedRegsCheck;
+
+  // Runtime call support.
+  static void* RedirectNativeFunction(void* nativeFunction,
+                                      ABIFunctionType type);
+
+ private:
+  // Handle arguments and return value for runtime FP functions.
+  void getFpArgs(double* x, double* y, int32_t* z);
+  void getFpFromStack(int32_t* stack, double* x1);
+  void setCallResultDouble(double result);
+  void setCallResultFloat(float result);
+  void setCallResult(int64_t res);
+  void scratchVolatileRegisters(bool scratchFloat = true);
+
+  template <class ReturnType, int register_size>
+  void getFromVFPRegister(int reg_index, ReturnType* out);
+
+  template <class InputType, int register_size>
+  void setVFPRegister(int reg_index, const InputType& value);
+
+  void callInternal(uint8_t* entry);
+
+  // Architecture state.
+  // Saturating instructions require a Q flag to indicate saturation.
+  // There is currently no way to read the CPSR directly, and thus read the Q
+  // flag, so this is left unimplemented.
+  int32_t registers_[16];
+  bool n_flag_;
+  bool z_flag_;
+  bool c_flag_;
+  bool v_flag_;
+
+  // VFP architecture state.
+  uint32_t vfp_registers_[num_d_registers * 2];
+  bool n_flag_FPSCR_;
+  bool z_flag_FPSCR_;
+  bool c_flag_FPSCR_;
+  bool v_flag_FPSCR_;
+
+  // VFP rounding mode. See ARM DDI 0406B Page A2-29.
+  VFPRoundingMode FPSCR_rounding_mode_;
+  bool FPSCR_default_NaN_mode_;
+
+  // VFP FP exception flags architecture state.
+  bool inv_op_vfp_flag_;
+  bool div_zero_vfp_flag_;
+  bool overflow_vfp_flag_;
+  bool underflow_vfp_flag_;
+  bool inexact_vfp_flag_;
+
+  // Simulator support.
+  char* stack_;
+  uintptr_t stackLimit_;
+  bool pc_modified_;
+  int64_t icount_;
+
+  // Debugger input.
+  char* lastDebuggerInput_;
+
+  // Registered breakpoints.
+  SimInstruction* break_pc_;
+  Instr break_instr_;
+
+  // Single-stepping support
+  bool single_stepping_;
+  SingleStepCallback single_step_callback_;
+  void* single_step_callback_arg_;
+
+  // A stop is watched if its code is less than kNumOfWatchedStops.
+  // Only watched stops support enabling/disabling and the counter feature.
+  static const uint32_t kNumOfWatchedStops = 256;
+
+  // Breakpoint is disabled if bit 31 is set.
+  static const uint32_t kStopDisabledBit = 1 << 31;
+
+  // A stop is enabled, meaning the simulator will stop when meeting the
+  // instruction, if bit 31 of watched_stops_[code].count is unset.
+  // The value watched_stops_[code].count & ~(1 << 31) indicates how many times
+  // the breakpoint was hit or gone through.
+  struct StopCountAndDesc {
+    uint32_t count;
+    char* desc;
+  };
+  StopCountAndDesc watched_stops_[kNumOfWatchedStops];
+
+ public:
+  int64_t icount() { return icount_; }
+
+ private:
+  // Exclusive access monitor
+  void exclusiveMonitorSet(uint64_t value);
+  uint64_t exclusiveMonitorGetAndClear(bool* held);
+  void exclusiveMonitorClear();
+
+  bool exclusiveMonitorHeld_;
+  uint64_t exclusiveMonitor_;
+};
+
+// Process wide simulator state.
+class SimulatorProcess {
+  friend class Redirection;
+  friend class AutoLockSimulatorCache;
+
+ private:
+  // ICache checking.
+  struct ICacheHasher {
+    typedef void* Key;
+    typedef void* Lookup;
+    static HashNumber hash(const Lookup& l);
+    static bool match(const Key& k, const Lookup& l);
+  };
+
+ public:
+  typedef HashMap<void*, CachePage*, ICacheHasher, SystemAllocPolicy> ICacheMap;
+
+  static mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
+      ICacheCheckingDisableCount;
+  static void FlushICache(void* start, size_t size);
+
+  static void checkICacheLocked(SimInstruction* instr);
+
+  static bool initialize() {
+    singleton_ = js_new<SimulatorProcess>();
+    return singleton_;
+  }
+  static void destroy() {
+    js_delete(singleton_);
+    singleton_ = nullptr;
+  }
+
+  SimulatorProcess();
+  ~SimulatorProcess();
+
+ private:
+  static SimulatorProcess* singleton_;
+
+  // This lock creates a critical section around 'redirection_' and
+  // 'icache_', which are referenced both by the execution engine
+  // and by the off-thread compiler (see Redirection::Get in the cpp file).
+  Mutex cacheLock_ MOZ_UNANNOTATED;
+
+  Redirection* redirection_;
+  ICacheMap icache_;
+
+ public:
+  static ICacheMap& icache() {
+    // Technically we need the lock to access the innards of the
+    // icache, not to take its address, but the latter condition
+    // serves as a useful complement to the former.
+    singleton_->cacheLock_.assertOwnedByCurrentThread();
+    return singleton_->icache_;
+  }
+
+  static Redirection* redirection() {
+    singleton_->cacheLock_.assertOwnedByCurrentThread();
+    return singleton_->redirection_;
+  }
+
+  static void setRedirection(js::jit::Redirection* redirection) {
+    singleton_->cacheLock_.assertOwnedByCurrentThread();
+    singleton_->redirection_ = redirection;
+  }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* JS_SIMULATOR_ARM */
+
+#endif /* jit_arm_Simulator_arm_h */
diff --git a/js/src/jit/arm/Trampoline-arm.cpp b/js/src/jit/arm/Trampoline-arm.cpp
new file mode 100644
index 0000000000..551f243bd3
--- /dev/null
+++ b/js/src/jit/arm/Trampoline-arm.cpp
@@ -0,0 +1,831 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/SharedICHelpers-arm.h"
+#include "jit/Bailouts.h"
+#include "jit/BaselineFrame.h"
+#include "jit/CalleeToken.h"
+#include "jit/JitFrames.h"
+#include "jit/JitRuntime.h"
+#include "jit/JitSpewer.h"
+#include "jit/PerfSpewer.h"
+#include "jit/VMFunctions.h"
+#include "vm/JitActivation.h"  // js::jit::JitActivation
+#include "vm/JSContext.h"
+#include "vm/Realm.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+static const FloatRegisterSet NonVolatileFloatRegs = FloatRegisterSet(
+    (1ULL << FloatRegisters::d8) | (1ULL << FloatRegisters::d9) |
+    (1ULL << FloatRegisters::d10) | (1ULL << FloatRegisters::d11) |
+    (1ULL << FloatRegisters::d12) | (1ULL << FloatRegisters::d13) |
+    (1ULL << FloatRegisters::d14) | (1ULL << FloatRegisters::d15));
+
+static void GenerateReturn(MacroAssembler& masm, int returnCode) {
+  // Restore non-volatile floating point registers.
+  masm.transferMultipleByRuns(NonVolatileFloatRegs, IsLoad, StackPointer, IA);
+
+  // Get rid of padding word.
+  masm.addPtr(Imm32(sizeof(void*)), sp);
+
+  // Set up return value
+  masm.ma_mov(Imm32(returnCode), r0);
+
+  // Pop and return
+  masm.startDataTransferM(IsLoad, sp, IA, WriteBack);
+  masm.transferReg(r4);
+  masm.transferReg(r5);
+  masm.transferReg(r6);
+  masm.transferReg(r7);
+  masm.transferReg(r8);
+  masm.transferReg(r9);
+  masm.transferReg(r10);
+  masm.transferReg(r11);
+  // r12 isn't saved, so it shouldn't be restored.
+  masm.transferReg(pc);
+  masm.finishDataTransfer();
+  masm.flushBuffer();
+}
+
+struct EnterJITStack {
+  double d8;
+  double d9;
+  double d10;
+  double d11;
+  double d12;
+  double d13;
+  double d14;
+  double d15;
+
+  // Padding.
+  void* padding;
+
+  // Non-volatile registers.
+  void* r4;
+  void* r5;
+  void* r6;
+  void* r7;
+  void* r8;
+  void* r9;
+  void* r10;
+  void* r11;
+  // The abi does not expect r12 (ip) to be preserved
+  void* lr;
+
+  // Arguments.
+  // code == r0
+  // argc == r1
+  // argv == r2
+  // frame == r3
+  CalleeToken token;
+  JSObject* scopeChain;
+  size_t numStackValues;
+  Value* vp;
+};
+
+/*
+ * This method generates a trampoline for a c++ function with the following
+ * signature:
+ *   void enter(void* code, int argc, Value* argv, InterpreterFrame* fp,
+ *              CalleeToken calleeToken, JSObject* scopeChain, Value* vp)
+ *   ...using standard EABI calling convention
+ */
+void JitRuntime::generateEnterJIT(JSContext* cx, MacroAssembler& masm) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateEnterJIT");
+
+  enterJITOffset_ = startTrampolineCode(masm);
+
+  const Address slot_token(sp, offsetof(EnterJITStack, token));
+  const Address slot_vp(sp, offsetof(EnterJITStack, vp));
+
+  static_assert(OsrFrameReg == r3);
+
+  Assembler* aasm = &masm;
+
+  // Save non-volatile registers. These must be saved by the trampoline,
+  // rather than the JIT'd code, because they are scanned by the conservative
+  // scanner.
+  masm.startDataTransferM(IsStore, sp, DB, WriteBack);
+  masm.transferReg(r4);   // [sp,0]
+  masm.transferReg(r5);   // [sp,4]
+  masm.transferReg(r6);   // [sp,8]
+  masm.transferReg(r7);   // [sp,12]
+  masm.transferReg(r8);   // [sp,16]
+  masm.transferReg(r9);   // [sp,20]
+  masm.transferReg(r10);  // [sp,24]
+  masm.transferReg(r11);  // [sp,28]
+  // The abi does not expect r12 (ip) to be preserved
+  masm.transferReg(lr);  // [sp,32]
+  // The 5th argument is located at [sp, 36]
+  masm.finishDataTransfer();
+
+  // Add padding word.
+  masm.subPtr(Imm32(sizeof(void*)), sp);
+
+  // Push the float registers.
+  masm.transferMultipleByRuns(NonVolatileFloatRegs, IsStore, sp, DB);
+
+  // Load calleeToken into r9.
+  masm.loadPtr(slot_token, r9);
+
+  // Save stack pointer.
+  masm.movePtr(sp, r11);
+
+  // Load the number of actual arguments into r10.
+  masm.loadPtr(slot_vp, r10);
+  masm.unboxInt32(Address(r10, 0), r10);
+
+  {
+    Label noNewTarget;
+    masm.branchTest32(Assembler::Zero, r9,
+                      Imm32(CalleeToken_FunctionConstructing), &noNewTarget);
+
+    masm.add32(Imm32(1), r1);
+
+    masm.bind(&noNewTarget);
+  }
+
+  // Guarantee stack alignment of Jit frames.
+  //
+  // This code moves the stack pointer to the location where it should be when
+  // we enter the Jit frame.  It moves the stack pointer such that we have
+  // enough space reserved for pushing the arguments, and the JitFrameLayout.
+  // The stack pointer is also aligned on the alignment expected by the Jit
+  // frames.
+  //
+  // At the end the register r4, is a pointer to the stack where the first
+  // argument is expected by the Jit frame.
+  //
+  aasm->as_sub(r4, sp, O2RegImmShift(r1, LSL, 3));  // r4 = sp - argc*8
+  aasm->as_bic(r4, r4, Imm8(JitStackAlignment - 1));
+  // r4 is now the aligned on the bottom of the list of arguments.
+  static_assert(
+      sizeof(JitFrameLayout) % JitStackAlignment == 0,
+      "No need to consider the JitFrameLayout for aligning the stack");
+  // sp' = ~(JitStackAlignment - 1) & (sp - argc * sizeof(Value))
+  masm.movePtr(r4, sp);
+
+  // Get a copy of the number of args to use as a decrement counter, also set
+  // the zero condition code.
+  aasm->as_mov(r5, O2Reg(r1), SetCC);
+
+  // Loop over arguments, copying them from an unknown buffer onto the Ion
+  // stack so they can be accessed from JIT'ed code.
+  {
+    Label header, footer;
+    // If there aren't any arguments, don't do anything.
+    aasm->as_b(&footer, Assembler::Zero);
+    // Get the top of the loop.
+    masm.bind(&header);
+    aasm->as_sub(r5, r5, Imm8(1), SetCC);
+    // We could be more awesome, and unroll this, using a loadm
+    // (particularly since the offset is effectively 0) but that seems more
+    // error prone, and complex.
+    // BIG FAT WARNING: this loads both r6 and r7.
+    aasm->as_extdtr(IsLoad, 64, true, PostIndex, r6,
+                    EDtrAddr(r2, EDtrOffImm(8)));
+    aasm->as_extdtr(IsStore, 64, true, PostIndex, r6,
+                    EDtrAddr(r4, EDtrOffImm(8)));
+    aasm->as_b(&header, Assembler::NonZero);
+    masm.bind(&footer);
+  }
+
+  // Push the callee token.
+  masm.push(r9);
+
+  // Push the frame descriptor.
+  masm.pushFrameDescriptorForJitCall(FrameType::CppToJSJit, r10, r10);
+
+  Label returnLabel;
+  {
+    // Handle Interpreter -> Baseline OSR.
+    AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
+    MOZ_ASSERT(!regs.has(r11));
+    regs.take(OsrFrameReg);
+    regs.take(r0);  // jitcode
+    MOZ_ASSERT(!regs.has(ReturnReg), "ReturnReg matches r0");
+
+    const Address slot_numStackValues(r11,
+                                      offsetof(EnterJITStack, numStackValues));
+
+    Label notOsr;
+    masm.branchTestPtr(Assembler::Zero, OsrFrameReg, OsrFrameReg, &notOsr);
+
+    Register scratch = regs.takeAny();
+
+    Register numStackValues = regs.takeAny();
+    masm.load32(slot_numStackValues, numStackValues);
+
+    // Write return address. On ARM, CodeLabel is only used for tableswitch,
+    // so we can't use it here to get the return address. Instead, we use pc
+    // + a fixed offset to a jump to returnLabel. The pc register holds pc +
+    // 8, so we add the size of 2 instructions to skip the instructions
+    // emitted by push and jump(&skipJump).
+    {
+      AutoForbidPoolsAndNops afp(&masm, 5);
+      Label skipJump;
+      masm.mov(pc, scratch);
+      masm.addPtr(Imm32(2 * sizeof(uint32_t)), scratch);
+      masm.push(scratch);
+      masm.jump(&skipJump);
+      masm.jump(&returnLabel);
+      masm.bind(&skipJump);
+    }
+
+    // Frame prologue.
+    masm.push(FramePointer);
+    masm.mov(sp, FramePointer);
+
+    // Reserve frame.
+    masm.subPtr(Imm32(BaselineFrame::Size()), sp);
+
+    Register framePtrScratch = regs.takeAny();
+    masm.touchFrameValues(numStackValues, scratch, framePtrScratch);
+    masm.mov(sp, framePtrScratch);
+
+    // Reserve space for locals and stack values.
+    masm.ma_lsl(Imm32(3), numStackValues, scratch);
+    masm.ma_sub(sp, scratch, sp);
+
+    // Enter exit frame.
+    masm.pushFrameDescriptor(FrameType::BaselineJS);
+    masm.push(Imm32(0));  // Fake return address.
+    masm.push(FramePointer);
+    // No GC things to mark on the stack, push a bare token.
+    masm.loadJSContext(scratch);
+    masm.enterFakeExitFrame(scratch, scratch, ExitFrameType::Bare);
+
+    masm.push(r0);  // jitcode
+
+    using Fn = bool (*)(BaselineFrame * frame, InterpreterFrame * interpFrame,
+                        uint32_t numStackValues);
+    masm.setupUnalignedABICall(scratch);
+    masm.passABIArg(framePtrScratch);  // BaselineFrame
+    masm.passABIArg(OsrFrameReg);      // InterpreterFrame
+    masm.passABIArg(numStackValues);
+    masm.callWithABI<Fn, jit::InitBaselineFrameForOsr>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+    Register jitcode = regs.takeAny();
+    masm.pop(jitcode);
+
+    MOZ_ASSERT(jitcode != ReturnReg);
+
+    Label error;
+    masm.addPtr(Imm32(ExitFrameLayout::SizeWithFooter()), sp);
+    masm.branchIfFalseBool(ReturnReg, &error);
+
+    // If OSR-ing, then emit instrumentation for setting lastProfilerFrame
+    // if profiler instrumentation is enabled.
+    {
+      Label skipProfilingInstrumentation;
+      AbsoluteAddress addressOfEnabled(
+          cx->runtime()->geckoProfiler().addressOfEnabled());
+      masm.branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
+                    &skipProfilingInstrumentation);
+      masm.profilerEnterFrame(FramePointer, scratch);
+      masm.bind(&skipProfilingInstrumentation);
+    }
+
+    masm.jump(jitcode);
+
+    // OOM: frame epilogue, load error value, discard return address and return.
+    masm.bind(&error);
+    masm.mov(FramePointer, sp);
+    masm.pop(FramePointer);
+    masm.addPtr(Imm32(sizeof(uintptr_t)), sp);  // Return address.
+    masm.moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
+    masm.jump(&returnLabel);
+
+    masm.bind(&notOsr);
+    // Load the scope chain in R1.
+    MOZ_ASSERT(R1.scratchReg() != r0);
+    masm.loadPtr(Address(r11, offsetof(EnterJITStack, scopeChain)),
+                 R1.scratchReg());
+  }
+
+  // The callee will push the return address and frame pointer on the stack,
+  // thus we check that the stack would be aligned once the call is complete.
+  masm.assertStackAlignment(JitStackAlignment, 2 * sizeof(uintptr_t));
+
+  // Call the function.
+  masm.callJitNoProfiler(r0);
+
+  // Interpreter -> Baseline OSR will return here.
+  masm.bind(&returnLabel);
+
+  // Discard arguments and padding. Set sp to the address of the EnterJITStack
+  // on the stack.
+  masm.mov(r11, sp);
+
+  // Store the returned value into the slot_vp
+  masm.loadPtr(slot_vp, r5);
+  masm.storeValue(JSReturnOperand, Address(r5, 0));
+
+  // Restore non-volatile registers and return.
+  GenerateReturn(masm, true);
+}
+
+// static
+mozilla::Maybe<::JS::ProfilingFrameIterator::RegisterState>
+JitRuntime::getCppEntryRegisters(JitFrameLayout* frameStackAddress) {
+  // Not supported, or not implemented yet.
+  // TODO: Implement along with the corresponding stack-walker changes, in
+  // coordination with the Gecko Profiler, see bug 1635987 and follow-ups.
+  return mozilla::Nothing{};
+}
+
+void JitRuntime::generateInvalidator(MacroAssembler& masm, Label* bailoutTail) {
+  // See large comment in x86's JitRuntime::generateInvalidator.
+
+  AutoCreatedBy acb(masm, "JitRuntime::generateInvalidator");
+
+  invalidatorOffset_ = startTrampolineCode(masm);
+
+  // At this point, one of two things has happened:
+  // 1) Execution has just returned from C code, which left the stack aligned
+  // 2) Execution has just returned from Ion code, which left the stack
+  // unaligned. The old return address should not matter, but we still want the
+  // stack to be aligned, and there is no good reason to automatically align it
+  // with a call to setupUnalignedABICall.
+  masm.as_bic(sp, sp, Imm8(7));
+  masm.startDataTransferM(IsStore, sp, DB, WriteBack);
+  // We don't have to push everything, but this is likely easier.
+  // Setting regs_.
+  for (uint32_t i = 0; i < Registers::Total; i++) {
+    masm.transferReg(Register::FromCode(i));
+  }
+  masm.finishDataTransfer();
+
+  // Since our datastructures for stack inspection are compile-time fixed,
+  // if there are only 16 double registers, then we need to reserve
+  // space on the stack for the missing 16.
+  if (FloatRegisters::ActualTotalPhys() != FloatRegisters::TotalPhys) {
+    ScratchRegisterScope scratch(masm);
+    int missingRegs =
+        FloatRegisters::TotalPhys - FloatRegisters::ActualTotalPhys();
+    masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp, scratch);
+  }
+
+  masm.startFloatTransferM(IsStore, sp, DB, WriteBack);
+  for (uint32_t i = 0; i < FloatRegisters::ActualTotalPhys(); i++) {
+    masm.transferFloatReg(FloatRegister(i, FloatRegister::Double));
+  }
+  masm.finishFloatTransfer();
+
+  masm.ma_mov(sp, r0);
+  // Reserve 8 bytes for the outparam to ensure alignment for
+  // setupAlignedABICall.
+  masm.reserveStack(sizeof(void*) * 2);
+  masm.mov(sp, r1);
+  using Fn =
+      bool (*)(InvalidationBailoutStack * sp, BaselineBailoutInfo * *info);
+  masm.setupAlignedABICall();
+  masm.passABIArg(r0);
+  masm.passABIArg(r1);
+  masm.callWithABI<Fn, InvalidationBailout>(
+      MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+
+  masm.pop(r2);  // Get bailoutInfo outparam.
+
+  // Pop the machine state and the dead frame.
+  masm.moveToStackPtr(FramePointer);
+
+  // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
+  masm.jump(bailoutTail);
+}
+
+void JitRuntime::generateArgumentsRectifier(MacroAssembler& masm,
+                                            ArgumentsRectifierKind kind) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateArgumentsRectifier");
+
+  switch (kind) {
+    case ArgumentsRectifierKind::Normal:
+      argumentsRectifierOffset_ = startTrampolineCode(masm);
+      break;
+    case ArgumentsRectifierKind::TrialInlining:
+      trialInliningArgumentsRectifierOffset_ = startTrampolineCode(masm);
+      break;
+  }
+  masm.pushReturnAddress();
+
+  // Frame prologue.
+  //
+  // NOTE: if this changes, fix the Baseline bailout code too!
+  // See BaselineStackBuilder::calculatePrevFramePtr and
+  // BaselineStackBuilder::buildRectifierFrame (in BaselineBailouts.cpp).
+  masm.push(FramePointer);
+  masm.mov(StackPointer, FramePointer);
+
+  static_assert(JitStackAlignment == sizeof(Value));
+
+  // Copy number of actual arguments into r0 and r8.
+  masm.loadNumActualArgs(FramePointer, r0);
+  masm.mov(r0, r8);
+
+  // Load the number of |undefined|s to push into r6.
+  masm.loadPtr(
+      Address(FramePointer, RectifierFrameLayout::offsetOfCalleeToken()), r1);
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_and(Imm32(CalleeTokenMask), r1, r6, scratch);
+  }
+  masm.loadFunctionArgCount(r6, r6);
+
+  masm.ma_sub(r6, r8, r2);
+
+  // Get the topmost argument.
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_alu(sp, lsl(r8, 3), r3, OpAdd);  // r3 <- sp + nargs * 8
+    masm.ma_add(r3, Imm32(sizeof(RectifierFrameLayout)), r3, scratch);
+  }
+
+  {
+    Label notConstructing;
+
+    masm.branchTest32(Assembler::Zero, r1,
+                      Imm32(CalleeToken_FunctionConstructing),
+                      &notConstructing);
+
+    // Add sizeof(Value) to overcome |this|
+    masm.as_extdtr(IsLoad, 64, true, Offset, r4, EDtrAddr(r3, EDtrOffImm(8)));
+    masm.as_extdtr(IsStore, 64, true, PreIndex, r4,
+                   EDtrAddr(sp, EDtrOffImm(-8)));
+
+    masm.bind(&notConstructing);
+  }
+
+  // Push undefined.
+  masm.moveValue(UndefinedValue(), ValueOperand(r5, r4));
+  {
+    Label undefLoopTop;
+    masm.bind(&undefLoopTop);
+    masm.as_extdtr(IsStore, 64, true, PreIndex, r4,
+                   EDtrAddr(sp, EDtrOffImm(-8)));
+    masm.as_sub(r2, r2, Imm8(1), SetCC);
+
+    masm.ma_b(&undefLoopTop, Assembler::NonZero);
+  }
+
+  // Push arguments, |nargs| + 1 times (to include |this|).
+  {
+    Label copyLoopTop;
+    masm.bind(&copyLoopTop);
+    masm.as_extdtr(IsLoad, 64, true, PostIndex, r4,
+                   EDtrAddr(r3, EDtrOffImm(-8)));
+    masm.as_extdtr(IsStore, 64, true, PreIndex, r4,
+                   EDtrAddr(sp, EDtrOffImm(-8)));
+
+    masm.as_sub(r8, r8, Imm8(1), SetCC);
+    masm.ma_b(&copyLoopTop, Assembler::NotSigned);
+  }
+
+  // Construct JitFrameLayout.
+  masm.ma_push(r1);  // callee token
+  masm.pushFrameDescriptorForJitCall(FrameType::Rectifier, r0, r0);
+
+  // Call the target function.
+  masm.andPtr(Imm32(CalleeTokenMask), r1);
+  switch (kind) {
+    case ArgumentsRectifierKind::Normal:
+      masm.loadJitCodeRaw(r1, r3);
+      argumentsRectifierReturnOffset_ = masm.callJitNoProfiler(r3);
+      break;
+    case ArgumentsRectifierKind::TrialInlining:
+      Label noBaselineScript, done;
+      masm.loadBaselineJitCodeRaw(r1, r3, &noBaselineScript);
+      masm.callJitNoProfiler(r3);
+      masm.jump(&done);
+
+      // See BaselineCacheIRCompiler::emitCallInlinedFunction.
+      masm.bind(&noBaselineScript);
+      masm.loadJitCodeRaw(r1, r3);
+      masm.callJitNoProfiler(r3);
+      masm.bind(&done);
+      break;
+  }
+
+  masm.mov(FramePointer, StackPointer);
+  masm.pop(FramePointer);
+  masm.ret();
+}
+
+static void PushBailoutFrame(MacroAssembler& masm, Register spArg) {
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  // STEP 1a: Save our register sets to the stack so Bailout() can read
+  // everything.
+  // sp % 8 == 0
+
+  masm.startDataTransferM(IsStore, sp, DB, WriteBack);
+  // We don't have to push everything, but this is likely easier.
+  // Setting regs_.
+  for (uint32_t i = 0; i < Registers::Total; i++) {
+    masm.transferReg(Register::FromCode(i));
+  }
+  masm.finishDataTransfer();
+
+  ScratchRegisterScope scratch(masm);
+
+  // Since our datastructures for stack inspection are compile-time fixed,
+  // if there are only 16 double registers, then we need to reserve
+  // space on the stack for the missing 16.
+  if (FloatRegisters::ActualTotalPhys() != FloatRegisters::TotalPhys) {
+    int missingRegs =
+        FloatRegisters::TotalPhys - FloatRegisters::ActualTotalPhys();
+    masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp, scratch);
+  }
+  masm.startFloatTransferM(IsStore, sp, DB, WriteBack);
+  for (uint32_t i = 0; i < FloatRegisters::ActualTotalPhys(); i++) {
+    masm.transferFloatReg(FloatRegister(i, FloatRegister::Double));
+  }
+  masm.finishFloatTransfer();
+
+  // The current stack pointer is the first argument to jit::Bailout.
+  masm.ma_mov(sp, spArg);
+}
+
+static void GenerateBailoutThunk(MacroAssembler& masm, Label* bailoutTail) {
+  PushBailoutFrame(masm, r0);
+
+  // Make space for Bailout's bailoutInfo outparam.
+  masm.reserveStack(sizeof(void*));
+  masm.mov(sp, r1);
+  using Fn = bool (*)(BailoutStack * sp, BaselineBailoutInfo * *info);
+  masm.setupAlignedABICall();
+
+  masm.passABIArg(r0);
+  masm.passABIArg(r1);
+
+  masm.callWithABI<Fn, Bailout>(MoveOp::GENERAL,
+                                CheckUnsafeCallWithABI::DontCheckOther);
+  masm.pop(r2);  // Get the bailoutInfo outparam.
+
+  // Remove both the bailout frame and the topmost Ion frame's stack.
+  masm.moveToStackPtr(FramePointer);
+
+  // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
+  masm.jump(bailoutTail);
+}
+
+void JitRuntime::generateBailoutHandler(MacroAssembler& masm,
+                                        Label* bailoutTail) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateBailoutHandler");
+
+  bailoutHandlerOffset_ = startTrampolineCode(masm);
+
+  GenerateBailoutThunk(masm, bailoutTail);
+}
+
+bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm,
+                                   const VMFunctionData& f, DynFn nativeFun,
+                                   uint32_t* wrapperOffset) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateVMWrapper");
+
+  *wrapperOffset = startTrampolineCode(masm);
+
+  AllocatableGeneralRegisterSet regs(Register::Codes::WrapperMask);
+
+  static_assert(
+      (Register::Codes::VolatileMask & ~Register::Codes::WrapperMask) == 0,
+      "Wrapper register set must be a superset of Volatile register set.");
+
+  // The context is the first argument; r0 is the first argument register.
+  Register cxreg = r0;
+  regs.take(cxreg);
+
+  // Stack is:
+  //    ... frame ...
+  //  +8  [args] + argPadding
+  //  +0  ExitFrame
+  //
+  // If it isn't a tail call, then the return address needs to be saved.
+  // Push the frame pointer to finish the exit frame, then link it up.
+  if (f.expectTailCall == NonTailCall) {
+    masm.pushReturnAddress();
+  }
+  masm.Push(FramePointer);
+  masm.moveStackPtrTo(FramePointer);
+  masm.loadJSContext(cxreg);
+  masm.enterExitFrame(cxreg, regs.getAny(), &f);
+
+  // Save the base of the argument set stored on the stack.
+  Register argsBase = InvalidReg;
+  if (f.explicitArgs) {
+    argsBase = r5;
+    regs.take(argsBase);
+    ScratchRegisterScope scratch(masm);
+    masm.ma_add(sp, Imm32(ExitFrameLayout::SizeWithFooter()), argsBase,
+                scratch);
+  }
+
+  // Reserve space for the outparameter.
+  Register outReg = InvalidReg;
+  switch (f.outParam) {
+    case Type_Value:
+      outReg = r4;
+      regs.take(outReg);
+      masm.reserveStack(sizeof(Value));
+      masm.ma_mov(sp, outReg);
+      break;
+
+    case Type_Handle:
+      outReg = r4;
+      regs.take(outReg);
+      masm.PushEmptyRooted(f.outParamRootType);
+      masm.ma_mov(sp, outReg);
+      break;
+
+    case Type_Int32:
+    case Type_Pointer:
+    case Type_Bool:
+      outReg = r4;
+      regs.take(outReg);
+      masm.reserveStack(sizeof(int32_t));
+      masm.ma_mov(sp, outReg);
+      break;
+
+    case Type_Double:
+      outReg = r4;
+      regs.take(outReg);
+      masm.reserveStack(sizeof(double));
+      masm.ma_mov(sp, outReg);
+      break;
+
+    default:
+      MOZ_ASSERT(f.outParam == Type_Void);
+      break;
+  }
+
+  masm.setupUnalignedABICall(regs.getAny());
+  masm.passABIArg(cxreg);
+
+  size_t argDisp = 0;
+
+  // Copy any arguments.
+  for (uint32_t explicitArg = 0; explicitArg < f.explicitArgs; explicitArg++) {
+    switch (f.argProperties(explicitArg)) {
+      case VMFunctionData::WordByValue:
+        masm.passABIArg(MoveOperand(argsBase, argDisp), MoveOp::GENERAL);
+        argDisp += sizeof(void*);
+        break;
+      case VMFunctionData::DoubleByValue:
+        // Values should be passed by reference, not by value, so we assert
+        // that the argument is a double-precision float.
+        MOZ_ASSERT(f.argPassedInFloatReg(explicitArg));
+        masm.passABIArg(MoveOperand(argsBase, argDisp), MoveOp::DOUBLE);
+        argDisp += sizeof(double);
+        break;
+      case VMFunctionData::WordByRef:
+        masm.passABIArg(
+            MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress),
+            MoveOp::GENERAL);
+        argDisp += sizeof(void*);
+        break;
+      case VMFunctionData::DoubleByRef:
+        masm.passABIArg(
+            MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress),
+            MoveOp::GENERAL);
+        argDisp += 2 * sizeof(void*);
+        break;
+    }
+  }
+
+  // Copy the implicit outparam, if any.
+  if (outReg != InvalidReg) {
+    masm.passABIArg(outReg);
+  }
+
+  masm.callWithABI(nativeFun, MoveOp::GENERAL,
+                   CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+  // Test for failure.
+  switch (f.failType()) {
+    case Type_Cell:
+      masm.branchTestPtr(Assembler::Zero, r0, r0, masm.failureLabel());
+      break;
+    case Type_Bool:
+      masm.branchIfFalseBool(r0, masm.failureLabel());
+      break;
+    case Type_Void:
+      break;
+    default:
+      MOZ_CRASH("unknown failure kind");
+  }
+
+  // Load the outparam and free any allocated stack.
+  switch (f.outParam) {
+    case Type_Handle:
+      masm.popRooted(f.outParamRootType, ReturnReg, JSReturnOperand);
+      break;
+
+    case Type_Value:
+      masm.loadValue(Address(sp, 0), JSReturnOperand);
+      masm.freeStack(sizeof(Value));
+      break;
+
+    case Type_Int32:
+    case Type_Pointer:
+      masm.load32(Address(sp, 0), ReturnReg);
+      masm.freeStack(sizeof(int32_t));
+      break;
+
+    case Type_Bool:
+      masm.load8ZeroExtend(Address(sp, 0), ReturnReg);
+      masm.freeStack(sizeof(int32_t));
+      break;
+
+    case Type_Double:
+      masm.loadDouble(Address(sp, 0), ReturnDoubleReg);
+      masm.freeStack(sizeof(double));
+      break;
+
+    default:
+      MOZ_ASSERT(f.outParam == Type_Void);
+      break;
+  }
+
+  // Until C++ code is instrumented against Spectre, prevent speculative
+  // execution from returning any private data.
+  if (f.returnsData() && JitOptions.spectreJitToCxxCalls) {
+    masm.speculationBarrier();
+  }
+
+  // Pop ExitFooterFrame and the frame pointer.
+  masm.leaveExitFrame(0);
+  masm.pop(FramePointer);
+
+  // Return. Subtract sizeof(void*) for the frame pointer.
+  masm.retn(Imm32(sizeof(ExitFrameLayout) - sizeof(void*) +
+                  f.explicitStackSlots() * sizeof(void*) +
+                  f.extraValuesToPop * sizeof(Value)));
+
+  return true;
+}
+
+uint32_t JitRuntime::generatePreBarrier(JSContext* cx, MacroAssembler& masm,
+                                        MIRType type) {
+  AutoCreatedBy acb(masm, "JitRuntime::generatePreBarrier");
+
+  uint32_t offset = startTrampolineCode(masm);
+
+  masm.pushReturnAddress();
+
+  static_assert(PreBarrierReg == r1);
+  Register temp1 = r2;
+  Register temp2 = r3;
+  Register temp3 = r4;
+  masm.push(temp1);
+  masm.push(temp2);
+  masm.push(temp3);
+
+  Label noBarrier;
+  masm.emitPreBarrierFastPath(cx->runtime(), type, temp1, temp2, temp3,
+                              &noBarrier);
+
+  // Call into C++ to mark this GC thing.
+  masm.pop(temp3);
+  masm.pop(temp2);
+  masm.pop(temp1);
+
+  LiveRegisterSet save;
+  save.set() =
+      RegisterSet(GeneralRegisterSet(Registers::VolatileMask),
+                  FloatRegisterSet(FloatRegisters::VolatileDoubleMask));
+  masm.PushRegsInMask(save);
+
+  masm.movePtr(ImmPtr(cx->runtime()), r0);
+
+  masm.setupUnalignedABICall(r2);
+  masm.passABIArg(r0);
+  masm.passABIArg(r1);
+  masm.callWithABI(JitPreWriteBarrier(type));
+  masm.PopRegsInMask(save);
+  masm.ret();
+
+  masm.bind(&noBarrier);
+  masm.pop(temp3);
+  masm.pop(temp2);
+  masm.pop(temp1);
+  masm.ret();
+
+  return offset;
+}
+
+void JitRuntime::generateBailoutTailStub(MacroAssembler& masm,
+                                         Label* bailoutTail) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateBailoutTailStub");
+
+  masm.bind(bailoutTail);
+  masm.generateBailoutTail(r1, r2);
+}
diff --git a/js/src/jit/arm/disasm/Constants-arm.cpp b/js/src/jit/arm/disasm/Constants-arm.cpp
new file mode 100644
index 0000000000..408e2df686
--- /dev/null
+++ b/js/src/jit/arm/disasm/Constants-arm.cpp
@@ -0,0 +1,117 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+// Copyright 2009 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "jit/arm/disasm/Constants-arm.h"
+
+#ifdef JS_DISASM_ARM
+
+namespace js {
+namespace jit {
+namespace disasm {
+
+double Instruction::DoubleImmedVmov() const {
+  // Reconstruct a double from the immediate encoded in the vmov instruction.
+  //
+  //   instruction: [xxxxxxxx,xxxxabcd,xxxxxxxx,xxxxefgh]
+  //   double: [aBbbbbbb,bbcdefgh,00000000,00000000,
+  //            00000000,00000000,00000000,00000000]
+  //
+  // where B = ~b. Only the high 16 bits are affected.
+  uint64_t high16;
+  high16 = (Bits(17, 16) << 4) | Bits(3, 0);  // xxxxxxxx,xxcdefgh.
+  high16 |= (0xff * Bit(18)) << 6;            // xxbbbbbb,bbxxxxxx.
+  high16 |= (Bit(18) ^ 1) << 14;              // xBxxxxxx,xxxxxxxx.
+  high16 |= Bit(19) << 15;                    // axxxxxxx,xxxxxxxx.
+
+  uint64_t imm = high16 << 48;
+  double d;
+  memcpy(&d, &imm, 8);
+  return d;
+}
+
+// These register names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+const char* Registers::names_[kNumRegisters] = {
+    "r0", "r1", "r2",  "r3", "r4", "r5", "r6", "r7",
+    "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc",
+};
+
+// List of alias names which can be used when referring to ARM registers.
+const Registers::RegisterAlias Registers::aliases_[] = {
+    {10, "sl"},  {11, "r11"}, {12, "r12"},        {13, "r13"},
+    {14, "r14"}, {15, "r15"}, {kNoRegister, NULL}};
+
+const char* Registers::Name(int reg) {
+  const char* result;
+  if ((0 <= reg) && (reg < kNumRegisters)) {
+    result = names_[reg];
+  } else {
+    result = "noreg";
+  }
+  return result;
+}
+
+// Support for VFP registers s0 to s31 (d0 to d15) and d16-d31.
+// Note that "sN:sM" is the same as "dN/2" up to d15.
+// These register names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+const char* VFPRegisters::names_[kNumVFPRegisters] = {
+    "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8",  "s9",  "s10",
+    "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21",
+    "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", "d0",
+    "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",  "d8",  "d9",  "d10", "d11",
+    "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22",
+    "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
+
+const char* VFPRegisters::Name(int reg, bool is_double) {
+  MOZ_ASSERT((0 <= reg) && (reg < kNumVFPRegisters));
+  return names_[reg + (is_double ? kNumVFPSingleRegisters : 0)];
+}
+
+int VFPRegisters::Number(const char* name, bool* is_double) {
+  for (int i = 0; i < kNumVFPRegisters; i++) {
+    if (strcmp(names_[i], name) == 0) {
+      if (i < kNumVFPSingleRegisters) {
+        *is_double = false;
+        return i;
+      } else {
+        *is_double = true;
+        return i - kNumVFPSingleRegisters;
+      }
+    }
+  }
+
+  // No register with the requested name found.
+  return kNoRegister;
+}
+
+int Registers::Number(const char* name) {
+  // Look through the canonical names.
+  for (int i = 0; i < kNumRegisters; i++) {
+    if (strcmp(names_[i], name) == 0) {
+      return i;
+    }
+  }
+
+  // Look through the alias names.
+  int i = 0;
+  while (aliases_[i].reg != kNoRegister) {
+    if (strcmp(aliases_[i].name, name) == 0) {
+      return aliases_[i].reg;
+    }
+    i++;
+  }
+
+  // No register with the requested name found.
+  return kNoRegister;
+}
+
+}  // namespace disasm
+}  // namespace jit
+}  // namespace js
+
+#endif  // JS_DISASM_ARM
diff --git a/js/src/jit/arm/disasm/Constants-arm.h b/js/src/jit/arm/disasm/Constants-arm.h
new file mode 100644
index 0000000000..0128062b3f
--- /dev/null
+++ b/js/src/jit/arm/disasm/Constants-arm.h
@@ -0,0 +1,684 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef jit_arm_disasm_Constants_arm_h
+#define jit_arm_disasm_Constants_arm_h
+
+#ifdef JS_DISASM_ARM
+
+#  include "mozilla/Assertions.h"
+#  include "mozilla/Types.h"
+
+#  include <string.h>
+
+namespace js {
+namespace jit {
+namespace disasm {
+
+// Constant pool marker.
+// Use UDF, the permanently undefined instruction.
+const int kConstantPoolMarkerMask = 0xfff000f0;
+const int kConstantPoolMarker = 0xe7f000f0;
+const int kConstantPoolLengthMaxMask = 0xffff;
+
+inline int EncodeConstantPoolLength(int length) {
+  MOZ_ASSERT((length & kConstantPoolLengthMaxMask) == length);
+  return ((length & 0xfff0) << 4) | (length & 0xf);
+}
+
+inline int DecodeConstantPoolLength(int instr) {
+  MOZ_ASSERT((instr & kConstantPoolMarkerMask) == kConstantPoolMarker);
+  return ((instr >> 4) & 0xfff0) | (instr & 0xf);
+}
+
+// Used in code age prologue - ldr(pc, MemOperand(pc, -4))
+const int kCodeAgeJumpInstruction = 0xe51ff004;
+
+// Number of registers in normal ARM mode.
+const int kNumRegisters = 16;
+
+// VFP support.
+const int kNumVFPSingleRegisters = 32;
+const int kNumVFPDoubleRegisters = 32;
+const int kNumVFPRegisters = kNumVFPSingleRegisters + kNumVFPDoubleRegisters;
+
+// PC is register 15.
+const int kPCRegister = 15;
+const int kNoRegister = -1;
+
+// -----------------------------------------------------------------------------
+// Conditions.
+
+// Defines constants and accessor classes to assemble, disassemble and
+// simulate ARM instructions.
+//
+// Section references in the code refer to the "ARM Architecture Reference
+// Manual" from July 2005 (available at http://www.arm.com/miscPDFs/14128.pdf)
+//
+// Constants for specific fields are defined in their respective named enums.
+// General constants are in an anonymous enum in class Instr.
+
+// Values for the condition field as defined in section A3.2
+enum Condition {
+  kNoCondition = -1,
+
+  eq = 0 << 28,   // Z set            Equal.
+  ne = 1 << 28,   // Z clear          Not equal.
+  cs = 2 << 28,   // C set            Unsigned higher or same.
+  cc = 3 << 28,   // C clear          Unsigned lower.
+  mi = 4 << 28,   // N set            Negative.
+  pl = 5 << 28,   // N clear          Positive or zero.
+  vs = 6 << 28,   // V set            Overflow.
+  vc = 7 << 28,   // V clear          No overflow.
+  hi = 8 << 28,   // C set, Z clear   Unsigned higher.
+  ls = 9 << 28,   // C clear or Z set Unsigned lower or same.
+  ge = 10 << 28,  // N == V           Greater or equal.
+  lt = 11 << 28,  // N != V           Less than.
+  gt = 12 << 28,  // Z clear, N == V  Greater than.
+  le = 13 << 28,  // Z set or N != V  Less then or equal
+  al = 14 << 28,  //                  Always.
+
+  kSpecialCondition = 15 << 28,  // Special condition (refer to section A3.2.1).
+  kNumberOfConditions = 16,
+
+  // Aliases.
+  hs = cs,  // C set            Unsigned higher or same.
+  lo = cc   // C clear          Unsigned lower.
+};
+
+inline Condition NegateCondition(Condition cond) {
+  MOZ_ASSERT(cond != al);
+  return static_cast<Condition>(cond ^ ne);
+}
+
+// Commute a condition such that {a cond b == b cond' a}.
+inline Condition CommuteCondition(Condition cond) {
+  switch (cond) {
+    case lo:
+      return hi;
+    case hi:
+      return lo;
+    case hs:
+      return ls;
+    case ls:
+      return hs;
+    case lt:
+      return gt;
+    case gt:
+      return lt;
+    case ge:
+      return le;
+    case le:
+      return ge;
+    default:
+      return cond;
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Instructions encoding.
+
+// Instr is merely used by the Assembler to distinguish 32bit integers
+// representing instructions from usual 32 bit values.
+// Instruction objects are pointers to 32bit values, and provide methods to
+// access the various ISA fields.
+typedef int32_t Instr;
+
+// Opcodes for Data-processing instructions (instructions with a type 0 and 1)
+// as defined in section A3.4
+enum Opcode {
+  AND = 0 << 21,   // Logical AND.
+  EOR = 1 << 21,   // Logical Exclusive OR.
+  SUB = 2 << 21,   // Subtract.
+  RSB = 3 << 21,   // Reverse Subtract.
+  ADD = 4 << 21,   // Add.
+  ADC = 5 << 21,   // Add with Carry.
+  SBC = 6 << 21,   // Subtract with Carry.
+  RSC = 7 << 21,   // Reverse Subtract with Carry.
+  TST = 8 << 21,   // Test.
+  TEQ = 9 << 21,   // Test Equivalence.
+  CMP = 10 << 21,  // Compare.
+  CMN = 11 << 21,  // Compare Negated.
+  ORR = 12 << 21,  // Logical (inclusive) OR.
+  MOV = 13 << 21,  // Move.
+  BIC = 14 << 21,  // Bit Clear.
+  MVN = 15 << 21   // Move Not.
+};
+
+// The bits for bit 7-4 for some type 0 miscellaneous instructions.
+enum MiscInstructionsBits74 {
+  // With bits 22-21 01.
+  BX = 1 << 4,
+  BXJ = 2 << 4,
+  BLX = 3 << 4,
+  BKPT = 7 << 4,
+
+  // With bits 22-21 11.
+  CLZ = 1 << 4
+};
+
+// Load and store exclusive instructions.
+
+// Bit positions.
+enum {
+  ExclusiveOpHi = 24,    // Hi bit of opcode field
+  ExclusiveOpLo = 23,    // Lo bit of opcode field
+  ExclusiveSizeHi = 22,  // Hi bit of operand size field
+  ExclusiveSizeLo = 21,  // Lo bit of operand size field
+  ExclusiveLoad = 20     // Bit indicating load
+};
+
+// Opcode bits for exclusive instructions.
+enum { ExclusiveOpcode = 3 };
+
+// Operand size, Bits(ExclusiveSizeHi,ExclusiveSizeLo).
+enum {
+  ExclusiveWord = 0,
+  ExclusiveDouble = 1,
+  ExclusiveByte = 2,
+  ExclusiveHalf = 3
+};
+
+// Instruction encoding bits and masks.
+enum {
+  H = 1 << 5,   // Halfword (or byte).
+  S6 = 1 << 6,  // Signed (or unsigned).
+  L = 1 << 20,  // Load (or store).
+  S = 1 << 20,  // Set condition code (or leave unchanged).
+  W = 1 << 21,  // Writeback base register (or leave unchanged).
+  A = 1 << 21,  // Accumulate in multiply instruction (or not).
+  B = 1 << 22,  // Unsigned byte (or word).
+  N = 1 << 22,  // Long (or short).
+  U = 1 << 23,  // Positive (or negative) offset/index.
+  P = 1 << 24,  // Offset/pre-indexed addressing (or post-indexed addressing).
+  I = 1 << 25,  // Immediate shifter operand (or not).
+  B0 = 1 << 0,
+  B4 = 1 << 4,
+  B5 = 1 << 5,
+  B6 = 1 << 6,
+  B7 = 1 << 7,
+  B8 = 1 << 8,
+  B9 = 1 << 9,
+  B12 = 1 << 12,
+  B16 = 1 << 16,
+  B17 = 1 << 17,
+  B18 = 1 << 18,
+  B19 = 1 << 19,
+  B20 = 1 << 20,
+  B21 = 1 << 21,
+  B22 = 1 << 22,
+  B23 = 1 << 23,
+  B24 = 1 << 24,
+  B25 = 1 << 25,
+  B26 = 1 << 26,
+  B27 = 1 << 27,
+  B28 = 1 << 28,
+
+  // Instruction bit masks.
+  kCondMask = 15 << 28,
+  kALUMask = 0x6f << 21,
+  kRdMask = 15 << 12,  // In str instruction.
+  kCoprocessorMask = 15 << 8,
+  kOpCodeMask = 15 << 21,  // In data-processing instructions.
+  kImm24Mask = (1 << 24) - 1,
+  kImm16Mask = (1 << 16) - 1,
+  kImm8Mask = (1 << 8) - 1,
+  kOff12Mask = (1 << 12) - 1,
+  kOff8Mask = (1 << 8) - 1
+};
+
+// -----------------------------------------------------------------------------
+// Addressing modes and instruction variants.
+
+// Condition code updating mode.
+enum SBit {
+  SetCC = 1 << 20,   // Set condition code.
+  LeaveCC = 0 << 20  // Leave condition code unchanged.
+};
+
+// Status register selection.
+enum SRegister { CPSR = 0 << 22, SPSR = 1 << 22 };
+
+// Shifter types for Data-processing operands as defined in section A5.1.2.
+enum ShiftOp {
+  LSL = 0 << 5,  // Logical shift left.
+  LSR = 1 << 5,  // Logical shift right.
+  ASR = 2 << 5,  // Arithmetic shift right.
+  ROR = 3 << 5,  // Rotate right.
+
+  // RRX is encoded as ROR with shift_imm == 0.
+  // Use a special code to make the distinction. The RRX ShiftOp is only used
+  // as an argument, and will never actually be encoded. The Assembler will
+  // detect it and emit the correct ROR shift operand with shift_imm == 0.
+  RRX = -1,
+  kNumberOfShifts = 4
+};
+
+// Status register fields.
+enum SRegisterField {
+  CPSR_c = CPSR | 1 << 16,
+  CPSR_x = CPSR | 1 << 17,
+  CPSR_s = CPSR | 1 << 18,
+  CPSR_f = CPSR | 1 << 19,
+  SPSR_c = SPSR | 1 << 16,
+  SPSR_x = SPSR | 1 << 17,
+  SPSR_s = SPSR | 1 << 18,
+  SPSR_f = SPSR | 1 << 19
+};
+
+// Status register field mask (or'ed SRegisterField enum values).
+typedef uint32_t SRegisterFieldMask;
+
+// Memory operand addressing mode.
+enum AddrMode {
+  // Bit encoding P U W.
+  Offset = (8 | 4 | 0) << 21,     // Offset (without writeback to base).
+  PreIndex = (8 | 4 | 1) << 21,   // Pre-indexed addressing with writeback.
+  PostIndex = (0 | 4 | 0) << 21,  // Post-indexed addressing with writeback.
+  NegOffset =
+      (8 | 0 | 0) << 21,  // Negative offset (without writeback to base).
+  NegPreIndex = (8 | 0 | 1) << 21,  // Negative pre-indexed with writeback.
+  NegPostIndex = (0 | 0 | 0) << 21  // Negative post-indexed with writeback.
+};
+
+// Load/store multiple addressing mode.
+enum BlockAddrMode {
+  // Bit encoding P U W .
+  da = (0 | 0 | 0) << 21,    // Decrement after.
+  ia = (0 | 4 | 0) << 21,    // Increment after.
+  db = (8 | 0 | 0) << 21,    // Decrement before.
+  ib = (8 | 4 | 0) << 21,    // Increment before.
+  da_w = (0 | 0 | 1) << 21,  // Decrement after with writeback to base.
+  ia_w = (0 | 4 | 1) << 21,  // Increment after with writeback to base.
+  db_w = (8 | 0 | 1) << 21,  // Decrement before with writeback to base.
+  ib_w = (8 | 4 | 1) << 21,  // Increment before with writeback to base.
+
+  // Alias modes for comparison when writeback does not matter.
+  da_x = (0 | 0 | 0) << 21,  // Decrement after.
+  ia_x = (0 | 4 | 0) << 21,  // Increment after.
+  db_x = (8 | 0 | 0) << 21,  // Decrement before.
+  ib_x = (8 | 4 | 0) << 21,  // Increment before.
+
+  kBlockAddrModeMask = (8 | 4 | 1) << 21
+};
+
+// Coprocessor load/store operand size.
+enum LFlag {
+  Long = 1 << 22,  // Long load/store coprocessor.
+  Short = 0 << 22  // Short load/store coprocessor.
+};
+
+// NEON data type
+enum NeonDataType {
+  NeonS8 = 0x1,             // U = 0, imm3 = 0b001
+  NeonS16 = 0x2,            // U = 0, imm3 = 0b010
+  NeonS32 = 0x4,            // U = 0, imm3 = 0b100
+  NeonU8 = 1 << 24 | 0x1,   // U = 1, imm3 = 0b001
+  NeonU16 = 1 << 24 | 0x2,  // U = 1, imm3 = 0b010
+  NeonU32 = 1 << 24 | 0x4,  // U = 1, imm3 = 0b100
+  NeonDataTypeSizeMask = 0x7,
+  NeonDataTypeUMask = 1 << 24
+};
+
+enum NeonListType { nlt_1 = 0x7, nlt_2 = 0xA, nlt_3 = 0x6, nlt_4 = 0x2 };
+
+enum NeonSize { Neon8 = 0x0, Neon16 = 0x1, Neon32 = 0x2, Neon64 = 0x3 };
+
+// -----------------------------------------------------------------------------
+// Supervisor Call (svc) specific support.
+
+// Special Software Interrupt codes when used in the presence of the ARM
+// simulator.
+// svc (formerly swi) provides a 24bit immediate value. Use bits 22:0 for
+// standard SoftwareInterrupCode. Bit 23 is reserved for the stop feature.
+enum SoftwareInterruptCodes {
+  // transition to C code
+  kCallRtRedirected = 0x10,
+  // break point
+  kBreakpoint = 0x20,
+  // stop
+  kStopCode = 1 << 23
+};
+const uint32_t kStopCodeMask = kStopCode - 1;
+const uint32_t kMaxStopCode = kStopCode - 1;
+const int32_t kDefaultStopCode = -1;
+
+// Type of VFP register. Determines register encoding.
+enum VFPRegPrecision { kSinglePrecision = 0, kDoublePrecision = 1 };
+
+// VFP FPSCR constants.
+enum VFPConversionMode { kFPSCRRounding = 0, kDefaultRoundToZero = 1 };
+
+// This mask does not include the "inexact" or "input denormal" cumulative
+// exceptions flags, because we usually don't want to check for it.
+const uint32_t kVFPExceptionMask = 0xf;
+const uint32_t kVFPInvalidOpExceptionBit = 1 << 0;
+const uint32_t kVFPOverflowExceptionBit = 1 << 2;
+const uint32_t kVFPUnderflowExceptionBit = 1 << 3;
+const uint32_t kVFPInexactExceptionBit = 1 << 4;
+const uint32_t kVFPFlushToZeroMask = 1 << 24;
+const uint32_t kVFPDefaultNaNModeControlBit = 1 << 25;
+
+const uint32_t kVFPNConditionFlagBit = 1 << 31;
+const uint32_t kVFPZConditionFlagBit = 1 << 30;
+const uint32_t kVFPCConditionFlagBit = 1 << 29;
+const uint32_t kVFPVConditionFlagBit = 1 << 28;
+
+// VFP rounding modes. See ARM DDI 0406B Page A2-29.
+enum VFPRoundingMode {
+  RN = 0 << 22,  // Round to Nearest.
+  RP = 1 << 22,  // Round towards Plus Infinity.
+  RM = 2 << 22,  // Round towards Minus Infinity.
+  RZ = 3 << 22,  // Round towards zero.
+
+  // Aliases.
+  kRoundToNearest = RN,
+  kRoundToPlusInf = RP,
+  kRoundToMinusInf = RM,
+  kRoundToZero = RZ
+};
+
+const uint32_t kVFPRoundingModeMask = 3 << 22;
+
+enum CheckForInexactConversion {
+  kCheckForInexactConversion,
+  kDontCheckForInexactConversion
+};
+
+// -----------------------------------------------------------------------------
+// Hints.
+
+// Branch hints are not used on the ARM.  They are defined so that they can
+// appear in shared function signatures, but will be ignored in ARM
+// implementations.
+enum Hint { no_hint };
+
+// Hints are not used on the arm.  Negating is trivial.
+inline Hint NegateHint(Hint ignored) { return no_hint; }
+
+// -----------------------------------------------------------------------------
+// Instruction abstraction.
+
+// The class Instruction enables access to individual fields defined in the ARM
+// architecture instruction set encoding as described in figure A3-1.
+// Note that the Assembler uses typedef int32_t Instr.
+//
+// Example: Test whether the instruction at ptr does set the condition code
+// bits.
+//
+// bool InstructionSetsConditionCodes(byte* ptr) {
+//   Instruction* instr = Instruction::At(ptr);
+//   int type = instr->TypeValue();
+//   return ((type == 0) || (type == 1)) && instr->HasS();
+// }
+//
+class Instruction {
+ public:
+  enum { kInstrSize = 4, kInstrSizeLog2 = 2, kPCReadOffset = 8 };
+
+  // Helper macro to define static accessors.
+  // We use the cast to char* trick to bypass the strict anti-aliasing rules.
+#  define DECLARE_STATIC_TYPED_ACCESSOR(return_type, Name) \
+    static inline return_type Name(Instr instr) {          \
+      char* temp = reinterpret_cast<char*>(&instr);        \
+      return reinterpret_cast<Instruction*>(temp)->Name(); \
+    }
+
+#  define DECLARE_STATIC_ACCESSOR(Name) DECLARE_STATIC_TYPED_ACCESSOR(int, Name)
+
+  // Get the raw instruction bits.
+  inline Instr InstructionBits() const {
+    return *reinterpret_cast<const Instr*>(this);
+  }
+
+  // Set the raw instruction bits to value.
+  inline void SetInstructionBits(Instr value) {
+    *reinterpret_cast<Instr*>(this) = value;
+  }
+
+  // Read one particular bit out of the instruction bits.
+  inline int Bit(int nr) const { return (InstructionBits() >> nr) & 1; }
+
+  // Read a bit field's value out of the instruction bits.
+  inline int Bits(int hi, int lo) const {
+    return (InstructionBits() >> lo) & ((2 << (hi - lo)) - 1);
+  }
+
+  // Read a bit field out of the instruction bits.
+  inline int BitField(int hi, int lo) const {
+    return InstructionBits() & (((2 << (hi - lo)) - 1) << lo);
+  }
+
+  // Static support.
+
+  // Read one particular bit out of the instruction bits.
+  static inline int Bit(Instr instr, int nr) { return (instr >> nr) & 1; }
+
+  // Read the value of a bit field out of the instruction bits.
+  static inline int Bits(Instr instr, int hi, int lo) {
+    return (instr >> lo) & ((2 << (hi - lo)) - 1);
+  }
+
+  // Read a bit field out of the instruction bits.
+  static inline int BitField(Instr instr, int hi, int lo) {
+    return instr & (((2 << (hi - lo)) - 1) << lo);
+  }
+
+  // Accessors for the different named fields used in the ARM encoding.
+  // The naming of these accessor corresponds to figure A3-1.
+  //
+  // Two kind of accessors are declared:
+  // - <Name>Field() will return the raw field, i.e. the field's bits at their
+  //   original place in the instruction encoding.
+  //   e.g. if instr is the 'addgt r0, r1, r2' instruction, encoded as
+  //   0xC0810002 ConditionField(instr) will return 0xC0000000.
+  // - <Name>Value() will return the field value, shifted back to bit 0.
+  //   e.g. if instr is the 'addgt r0, r1, r2' instruction, encoded as
+  //   0xC0810002 ConditionField(instr) will return 0xC.
+
+  // Generally applicable fields
+  inline Condition ConditionValue() const {
+    return static_cast<Condition>(Bits(31, 28));
+  }
+  inline Condition ConditionField() const {
+    return static_cast<Condition>(BitField(31, 28));
+  }
+  DECLARE_STATIC_TYPED_ACCESSOR(Condition, ConditionValue);
+  DECLARE_STATIC_TYPED_ACCESSOR(Condition, ConditionField);
+
+  inline int TypeValue() const { return Bits(27, 25); }
+  inline int SpecialValue() const { return Bits(27, 23); }
+
+  inline int RnValue() const { return Bits(19, 16); }
+  DECLARE_STATIC_ACCESSOR(RnValue);
+  inline int RdValue() const { return Bits(15, 12); }
+  DECLARE_STATIC_ACCESSOR(RdValue);
+
+  inline int CoprocessorValue() const { return Bits(11, 8); }
+  // Support for VFP.
+  // Vn(19-16) | Vd(15-12) |  Vm(3-0)
+  inline int VnValue() const { return Bits(19, 16); }
+  inline int VmValue() const { return Bits(3, 0); }
+  inline int VdValue() const { return Bits(15, 12); }
+  inline int NValue() const { return Bit(7); }
+  inline int MValue() const { return Bit(5); }
+  inline int DValue() const { return Bit(22); }
+  inline int RtValue() const { return Bits(15, 12); }
+  inline int PValue() const { return Bit(24); }
+  inline int UValue() const { return Bit(23); }
+  inline int Opc1Value() const { return (Bit(23) << 2) | Bits(21, 20); }
+  inline int Opc2Value() const { return Bits(19, 16); }
+  inline int Opc3Value() const { return Bits(7, 6); }
+  inline int SzValue() const { return Bit(8); }
+  inline int VLValue() const { return Bit(20); }
+  inline int VCValue() const { return Bit(8); }
+  inline int VAValue() const { return Bits(23, 21); }
+  inline int VBValue() const { return Bits(6, 5); }
+  inline int VFPNRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 16, 7);
+  }
+  inline int VFPMRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 0, 5);
+  }
+  inline int VFPDRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 12, 22);
+  }
+
+  // Fields used in Data processing instructions
+  inline int OpcodeValue() const { return static_cast<Opcode>(Bits(24, 21)); }
+  inline Opcode OpcodeField() const {
+    return static_cast<Opcode>(BitField(24, 21));
+  }
+  inline int SValue() const { return Bit(20); }
+  // with register
+  inline int RmValue() const { return Bits(3, 0); }
+  DECLARE_STATIC_ACCESSOR(RmValue);
+  inline int ShiftValue() const { return static_cast<ShiftOp>(Bits(6, 5)); }
+  inline ShiftOp ShiftField() const {
+    return static_cast<ShiftOp>(BitField(6, 5));
+  }
+  inline int RegShiftValue() const { return Bit(4); }
+  inline int RsValue() const { return Bits(11, 8); }
+  inline int ShiftAmountValue() const { return Bits(11, 7); }
+  // with immediate
+  inline int RotateValue() const { return Bits(11, 8); }
+  DECLARE_STATIC_ACCESSOR(RotateValue);
+  inline int Immed8Value() const { return Bits(7, 0); }
+  DECLARE_STATIC_ACCESSOR(Immed8Value);
+  inline int Immed4Value() const { return Bits(19, 16); }
+  inline int ImmedMovwMovtValue() const {
+    return Immed4Value() << 12 | Offset12Value();
+  }
+  DECLARE_STATIC_ACCESSOR(ImmedMovwMovtValue);
+
+  // Fields used in Load/Store instructions
+  inline int PUValue() const { return Bits(24, 23); }
+  inline int PUField() const { return BitField(24, 23); }
+  inline int BValue() const { return Bit(22); }
+  inline int WValue() const { return Bit(21); }
+  inline int LValue() const { return Bit(20); }
+  // with register uses same fields as Data processing instructions above
+  // with immediate
+  inline int Offset12Value() const { return Bits(11, 0); }
+  // multiple
+  inline int RlistValue() const { return Bits(15, 0); }
+  // extra loads and stores
+  inline int SignValue() const { return Bit(6); }
+  inline int HValue() const { return Bit(5); }
+  inline int ImmedHValue() const { return Bits(11, 8); }
+  inline int ImmedLValue() const { return Bits(3, 0); }
+
+  // Fields used in Branch instructions
+  inline int LinkValue() const { return Bit(24); }
+  inline int SImmed24Value() const { return ((InstructionBits() << 8) >> 8); }
+
+  // Fields used in Software interrupt instructions
+  inline SoftwareInterruptCodes SvcValue() const {
+    return static_cast<SoftwareInterruptCodes>(Bits(23, 0));
+  }
+
+  // Test for special encodings of type 0 instructions (extra loads and stores,
+  // as well as multiplications).
+  inline bool IsSpecialType0() const { return (Bit(7) == 1) && (Bit(4) == 1); }
+
+  // Test for miscellaneous instructions encodings of type 0 instructions.
+  inline bool IsMiscType0() const {
+    return (Bit(24) == 1) && (Bit(23) == 0) && (Bit(20) == 0) &&
+           ((Bit(7) == 0));
+  }
+
+  // Test for a nop instruction, which falls under type 1.
+  inline bool IsNopType1() const { return Bits(24, 0) == 0x0120F000; }
+
+  // Test for a nop instruction, which falls under type 1.
+  inline bool IsCsdbType1() const { return Bits(24, 0) == 0x0120F014; }
+
+  // Test for a stop instruction.
+  inline bool IsStop() const {
+    return (TypeValue() == 7) && (Bit(24) == 1) && (SvcValue() >= kStopCode);
+  }
+
+  // Special accessors that test for existence of a value.
+  inline bool HasS() const { return SValue() == 1; }
+  inline bool HasB() const { return BValue() == 1; }
+  inline bool HasW() const { return WValue() == 1; }
+  inline bool HasL() const { return LValue() == 1; }
+  inline bool HasU() const { return UValue() == 1; }
+  inline bool HasSign() const { return SignValue() == 1; }
+  inline bool HasH() const { return HValue() == 1; }
+  inline bool HasLink() const { return LinkValue() == 1; }
+
+  // Decoding the double immediate in the vmov instruction.
+  double DoubleImmedVmov() const;
+
+  // Instructions are read of out a code stream. The only way to get a
+  // reference to an instruction is to convert a pointer. There is no way
+  // to allocate or create instances of class Instruction.
+  // Use the At(pc) function to create references to Instruction.
+  static Instruction* At(uint8_t* pc) {
+    return reinterpret_cast<Instruction*>(pc);
+  }
+
+ private:
+  // Join split register codes, depending on single or double precision.
+  // four_bit is the position of the least-significant bit of the four
+  // bit specifier. one_bit is the position of the additional single bit
+  // specifier.
+  inline int VFPGlueRegValue(VFPRegPrecision pre, int four_bit, int one_bit) {
+    if (pre == kSinglePrecision) {
+      return (Bits(four_bit + 3, four_bit) << 1) | Bit(one_bit);
+    }
+    return (Bit(one_bit) << 4) | Bits(four_bit + 3, four_bit);
+  }
+
+  // We need to prevent the creation of instances of class Instruction.
+  Instruction() = delete;
+  Instruction(const Instruction&) = delete;
+  void operator=(const Instruction&) = delete;
+};
+
+// Helper functions for converting between register numbers and names.
+class Registers {
+ public:
+  // Return the name of the register.
+  static const char* Name(int reg);
+
+  // Lookup the register number for the name provided.
+  static int Number(const char* name);
+
+  struct RegisterAlias {
+    int reg;
+    const char* name;
+  };
+
+ private:
+  static const char* names_[kNumRegisters];
+  static const RegisterAlias aliases_[];
+};
+
+// Helper functions for converting between VFP register numbers and names.
+class VFPRegisters {
+ public:
+  // Return the name of the register.
+  static const char* Name(int reg, bool is_double);
+
+  // Lookup the register number for the name provided.
+  // Set flag pointed by is_double to true if register
+  // is double-precision.
+  static int Number(const char* name, bool* is_double);
+
+ private:
+  static const char* names_[kNumVFPRegisters];
+};
+
+}  // namespace disasm
+}  // namespace jit
+}  // namespace js
+
+#endif  // JS_DISASM_ARM
+
+#endif  // jit_arm_disasm_Constants_arm_h
diff --git a/js/src/jit/arm/disasm/Disasm-arm.cpp b/js/src/jit/arm/disasm/Disasm-arm.cpp
new file mode 100644
index 0000000000..97f39e1331
--- /dev/null
+++ b/js/src/jit/arm/disasm/Disasm-arm.cpp
@@ -0,0 +1,2031 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// A Disassembler object is used to disassemble a block of code instruction by
+// instruction. The default implementation of the NameConverter object can be
+// overriden to modify register names or to do symbol lookup on addresses.
+//
+// The example below will disassemble a block of code and print it to stdout.
+//
+//   disasm::NameConverter converter;
+//   disasm::Disassembler d(converter);
+//   for (uint8_t* pc = begin; pc < end;) {
+//     disasm::EmbeddedVector<char, disasm::ReasonableBufferSize> buffer;
+//     uint8_t* prev_pc = pc;
+//     pc += d.InstructionDecode(buffer, pc);
+//     printf("%p    %08x      %s\n",
+//            prev_pc, *reinterpret_cast<int32_t*>(prev_pc), buffer);
+//   }
+//
+// The Disassembler class also has a convenience method to disassemble a block
+// of code into a FILE*, meaning that the above functionality could also be
+// achieved by just calling Disassembler::Disassemble(stdout, begin, end);
+
+#include "jit/arm/disasm/Disasm-arm.h"
+
+#ifdef JS_DISASM_ARM
+
+#  include <stdarg.h>
+#  include <stdio.h>
+#  include <string.h>
+
+#  include "jit/arm/disasm/Constants-arm.h"
+
+namespace js {
+namespace jit {
+namespace disasm {
+
+// Helper function for printing to a Vector.
+static int MOZ_FORMAT_PRINTF(2, 3)
+    SNPrintF(V8Vector<char> str, const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  int result = vsnprintf(str.start(), str.length(), format, args);
+  va_end(args);
+  return result;
+}
+
+//------------------------------------------------------------------------------
+
+// Decoder decodes and disassembles instructions into an output buffer.
+// It uses the converter to convert register names and call destinations into
+// more informative description.
+class Decoder {
+ public:
+  Decoder(const disasm::NameConverter& converter, V8Vector<char> out_buffer)
+      : converter_(converter), out_buffer_(out_buffer), out_buffer_pos_(0) {
+    out_buffer_[out_buffer_pos_] = '\0';
+  }
+
+  ~Decoder() {}
+
+  // Writes one disassembled instruction into 'buffer' (0-terminated).
+  // Returns the length of the disassembled machine instruction in bytes.
+  int InstructionDecode(uint8_t* instruction);
+
+  static bool IsConstantPoolAt(uint8_t* instr_ptr);
+  static int ConstantPoolSizeAt(uint8_t* instr_ptr);
+
+ private:
+  // Bottleneck functions to print into the out_buffer.
+  void PrintChar(const char ch);
+  void Print(const char* str);
+
+  // Printing of common values.
+  void PrintRegister(int reg);
+  void PrintSRegister(int reg);
+  void PrintDRegister(int reg);
+  int FormatVFPRegister(Instruction* instr, const char* format);
+  void PrintMovwMovt(Instruction* instr);
+  int FormatVFPinstruction(Instruction* instr, const char* format);
+  void PrintCondition(Instruction* instr);
+  void PrintShiftRm(Instruction* instr);
+  void PrintShiftImm(Instruction* instr);
+  void PrintShiftSat(Instruction* instr);
+  void PrintPU(Instruction* instr);
+  void PrintSoftwareInterrupt(SoftwareInterruptCodes svc);
+
+  // Handle formatting of instructions and their options.
+  int FormatRegister(Instruction* instr, const char* option);
+  void FormatNeonList(int Vd, int type);
+  void FormatNeonMemory(int Rn, int align, int Rm);
+  int FormatOption(Instruction* instr, const char* option);
+  void Format(Instruction* instr, const char* format);
+  void Unknown(Instruction* instr);
+
+  // Each of these functions decodes one particular instruction type, a 3-bit
+  // field in the instruction encoding.
+  // Types 0 and 1 are combined as they are largely the same except for the way
+  // they interpret the shifter operand.
+  void DecodeType01(Instruction* instr);
+  void DecodeType2(Instruction* instr);
+  void DecodeType3(Instruction* instr);
+  void DecodeType4(Instruction* instr);
+  void DecodeType5(Instruction* instr);
+  void DecodeType6(Instruction* instr);
+  // Type 7 includes special Debugger instructions.
+  int DecodeType7(Instruction* instr);
+  // For VFP support.
+  void DecodeTypeVFP(Instruction* instr);
+  void DecodeType6CoprocessorIns(Instruction* instr);
+
+  void DecodeSpecialCondition(Instruction* instr);
+
+  void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
+  void DecodeVCMP(Instruction* instr);
+  void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
+  void DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr);
+
+  const disasm::NameConverter& converter_;
+  V8Vector<char> out_buffer_;
+  int out_buffer_pos_;
+
+  // Disallow copy and assign.
+  Decoder(const Decoder&) = delete;
+  void operator=(const Decoder&) = delete;
+};
+
+// Support for assertions in the Decoder formatting functions.
+#  define STRING_STARTS_WITH(string, compare_string) \
+    (strncmp(string, compare_string, strlen(compare_string)) == 0)
+
+// Append the ch to the output buffer.
+void Decoder::PrintChar(const char ch) { out_buffer_[out_buffer_pos_++] = ch; }
+
+// Append the str to the output buffer.
+void Decoder::Print(const char* str) {
+  char cur = *str++;
+  while (cur != '\0' && (out_buffer_pos_ < int(out_buffer_.length() - 1))) {
+    PrintChar(cur);
+    cur = *str++;
+  }
+  out_buffer_[out_buffer_pos_] = 0;
+}
+
+// These condition names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+static const char* const cond_names[kNumberOfConditions] = {
+    "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+    "hi", "ls", "ge", "lt", "gt", "le", "",   "invalid",
+};
+
+// Print the condition guarding the instruction.
+void Decoder::PrintCondition(Instruction* instr) {
+  Print(cond_names[instr->ConditionValue()]);
+}
+
+// Print the register name according to the active name converter.
+void Decoder::PrintRegister(int reg) {
+  Print(converter_.NameOfCPURegister(reg));
+}
+
+// Print the VFP S register name according to the active name converter.
+void Decoder::PrintSRegister(int reg) { Print(VFPRegisters::Name(reg, false)); }
+
+// Print the VFP D register name according to the active name converter.
+void Decoder::PrintDRegister(int reg) { Print(VFPRegisters::Name(reg, true)); }
+
+// These shift names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+static const char* const shift_names[kNumberOfShifts] = {"lsl", "lsr", "asr",
+                                                         "ror"};
+
+// Print the register shift operands for the instruction. Generally used for
+// data processing instructions.
+void Decoder::PrintShiftRm(Instruction* instr) {
+  ShiftOp shift = instr->ShiftField();
+  int shift_index = instr->ShiftValue();
+  int shift_amount = instr->ShiftAmountValue();
+  int rm = instr->RmValue();
+
+  PrintRegister(rm);
+
+  if ((instr->RegShiftValue() == 0) && (shift == LSL) && (shift_amount == 0)) {
+    // Special case for using rm only.
+    return;
+  }
+  if (instr->RegShiftValue() == 0) {
+    // by immediate
+    if ((shift == ROR) && (shift_amount == 0)) {
+      Print(", RRX");
+      return;
+    } else if (((shift == LSR) || (shift == ASR)) && (shift_amount == 0)) {
+      shift_amount = 32;
+    }
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, ", %s #%d",
+                                shift_names[shift_index], shift_amount);
+  } else {
+    // by register
+    int rs = instr->RsValue();
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, ", %s ",
+                                shift_names[shift_index]);
+    PrintRegister(rs);
+  }
+}
+
+static inline uint32_t RotateRight32(uint32_t value, uint32_t shift) {
+  if (shift == 0) return value;
+  return (value >> shift) | (value << (32 - shift));
+}
+
+// Print the immediate operand for the instruction. Generally used for data
+// processing instructions.
+void Decoder::PrintShiftImm(Instruction* instr) {
+  int rotate = instr->RotateValue() * 2;
+  int immed8 = instr->Immed8Value();
+  int imm = RotateRight32(immed8, rotate);
+  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "#%d", imm);
+}
+
+// Print the optional shift and immediate used by saturating instructions.
+void Decoder::PrintShiftSat(Instruction* instr) {
+  int shift = instr->Bits(11, 7);
+  if (shift > 0) {
+    out_buffer_pos_ +=
+        SNPrintF(out_buffer_ + out_buffer_pos_, ", %s #%d",
+                 shift_names[instr->Bit(6) * 2], instr->Bits(11, 7));
+  }
+}
+
+// Print PU formatting to reduce complexity of FormatOption.
+void Decoder::PrintPU(Instruction* instr) {
+  switch (instr->PUField()) {
+    case da_x: {
+      Print("da");
+      break;
+    }
+    case ia_x: {
+      Print("ia");
+      break;
+    }
+    case db_x: {
+      Print("db");
+      break;
+    }
+    case ib_x: {
+      Print("ib");
+      break;
+    }
+    default: {
+      MOZ_CRASH();
+      break;
+    }
+  }
+}
+
+// Print SoftwareInterrupt codes. Factoring this out reduces the complexity of
+// the FormatOption method.
+void Decoder::PrintSoftwareInterrupt(SoftwareInterruptCodes svc) {
+  switch (svc) {
+    case kCallRtRedirected:
+      Print("call rt redirected");
+      return;
+    case kBreakpoint:
+      Print("breakpoint");
+      return;
+    default:
+      if (svc >= kStopCode) {
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d - 0x%x",
+                                    svc & kStopCodeMask, svc & kStopCodeMask);
+      } else {
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", svc);
+      }
+      return;
+  }
+}
+
+// Handle all register based formatting in this function to reduce the
+// complexity of FormatOption.
+int Decoder::FormatRegister(Instruction* instr, const char* format) {
+  MOZ_ASSERT(format[0] == 'r');
+  if (format[1] == 'n') {  // 'rn: Rn register
+    int reg = instr->RnValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 'd') {  // 'rd: Rd register
+    int reg = instr->RdValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 's') {  // 'rs: Rs register
+    int reg = instr->RsValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 'm') {  // 'rm: Rm register
+    int reg = instr->RmValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 't') {  // 'rt: Rt register
+    int reg = instr->RtValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 'l') {
+    // 'rlist: register list for load and store multiple instructions
+    MOZ_ASSERT(STRING_STARTS_WITH(format, "rlist"));
+    int rlist = instr->RlistValue();
+    int reg = 0;
+    Print("{");
+    // Print register list in ascending order, by scanning the bit mask.
+    while (rlist != 0) {
+      if ((rlist & 1) != 0) {
+        PrintRegister(reg);
+        if ((rlist >> 1) != 0) {
+          Print(", ");
+        }
+      }
+      reg++;
+      rlist >>= 1;
+    }
+    Print("}");
+    return 5;
+  }
+  MOZ_CRASH();
+  return -1;
+}
+
+// Handle all VFP register based formatting in this function to reduce the
+// complexity of FormatOption.
+int Decoder::FormatVFPRegister(Instruction* instr, const char* format) {
+  MOZ_ASSERT((format[0] == 'S') || (format[0] == 'D'));
+
+  VFPRegPrecision precision =
+      format[0] == 'D' ? kDoublePrecision : kSinglePrecision;
+
+  int retval = 2;
+  int reg = -1;
+  if (format[1] == 'n') {
+    reg = instr->VFPNRegValue(precision);
+  } else if (format[1] == 'm') {
+    reg = instr->VFPMRegValue(precision);
+  } else if (format[1] == 'd') {
+    if ((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) &&
+        (instr->Bits(11, 9) == 0x5) && (instr->Bit(4) == 0x1)) {
+      // vmov.32 has Vd in a different place.
+      reg = instr->Bits(19, 16) | (instr->Bit(7) << 4);
+    } else {
+      reg = instr->VFPDRegValue(precision);
+    }
+
+    if (format[2] == '+') {
+      int immed8 = instr->Immed8Value();
+      if (format[0] == 'S') reg += immed8 - 1;
+      if (format[0] == 'D') reg += (immed8 / 2 - 1);
+    }
+    if (format[2] == '+') retval = 3;
+  } else {
+    MOZ_CRASH();
+  }
+
+  if (precision == kSinglePrecision) {
+    PrintSRegister(reg);
+  } else {
+    PrintDRegister(reg);
+  }
+
+  return retval;
+}
+
+int Decoder::FormatVFPinstruction(Instruction* instr, const char* format) {
+  Print(format);
+  return 0;
+}
+
+void Decoder::FormatNeonList(int Vd, int type) {
+  if (type == nlt_1) {
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "{d%d}", Vd);
+  } else if (type == nlt_2) {
+    out_buffer_pos_ +=
+        SNPrintF(out_buffer_ + out_buffer_pos_, "{d%d, d%d}", Vd, Vd + 1);
+  } else if (type == nlt_3) {
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                "{d%d, d%d, d%d}", Vd, Vd + 1, Vd + 2);
+  } else if (type == nlt_4) {
+    out_buffer_pos_ +=
+        SNPrintF(out_buffer_ + out_buffer_pos_, "{d%d, d%d, d%d, d%d}", Vd,
+                 Vd + 1, Vd + 2, Vd + 3);
+  }
+}
+
+void Decoder::FormatNeonMemory(int Rn, int align, int Rm) {
+  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "[r%d", Rn);
+  if (align != 0) {
+    out_buffer_pos_ +=
+        SNPrintF(out_buffer_ + out_buffer_pos_, ":%d", (1 << align) << 6);
+  }
+  if (Rm == 15) {
+    Print("]");
+  } else if (Rm == 13) {
+    Print("]!");
+  } else {
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "], r%d", Rm);
+  }
+}
+
+// Print the movw or movt instruction.
+void Decoder::PrintMovwMovt(Instruction* instr) {
+  int imm = instr->ImmedMovwMovtValue();
+  int rd = instr->RdValue();
+  PrintRegister(rd);
+  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, ", #%d", imm);
+}
+
+// FormatOption takes a formatting string and interprets it based on
+// the current instructions. The format string points to the first
+// character of the option string (the option escape has already been
+// consumed by the caller.)  FormatOption returns the number of
+// characters that were consumed from the formatting string.
+int Decoder::FormatOption(Instruction* instr, const char* format) {
+  switch (format[0]) {
+    case 'a': {  // 'a: accumulate multiplies
+      if (instr->Bit(21) == 0) {
+        Print("ul");
+      } else {
+        Print("la");
+      }
+      return 1;
+    }
+    case 'b': {  // 'b: byte loads or stores
+      if (instr->HasB()) {
+        Print("b");
+      }
+      return 1;
+    }
+    case 'c': {  // 'cond: conditional execution
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "cond"));
+      PrintCondition(instr);
+      return 4;
+    }
+    case 'd': {  // 'd: vmov double immediate.
+      double d = instr->DoubleImmedVmov();
+      out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "#%g", d);
+      return 1;
+    }
+    case 'f': {  // 'f: bitfield instructions - v7 and above.
+      uint32_t lsbit = instr->Bits(11, 7);
+      uint32_t width = instr->Bits(20, 16) + 1;
+      if (instr->Bit(21) == 0) {
+        // BFC/BFI:
+        // Bits 20-16 represent most-significant bit. Covert to width.
+        width -= lsbit;
+        MOZ_ASSERT(width > 0);
+      }
+      MOZ_ASSERT((width + lsbit) <= 32);
+      out_buffer_pos_ +=
+          SNPrintF(out_buffer_ + out_buffer_pos_, "#%d, #%d", lsbit, width);
+      return 1;
+    }
+    case 'h': {  // 'h: halfword operation for extra loads and stores
+      if (instr->HasH()) {
+        Print("h");
+      } else {
+        Print("b");
+      }
+      return 1;
+    }
+    case 'i': {  // 'i: immediate value from adjacent bits.
+      // Expects tokens in the form imm%02d@%02d, i.e. imm05@07, imm10@16
+      int width = (format[3] - '0') * 10 + (format[4] - '0');
+      int lsb = (format[6] - '0') * 10 + (format[7] - '0');
+
+      MOZ_ASSERT((width >= 1) && (width <= 32));
+      MOZ_ASSERT((lsb >= 0) && (lsb <= 31));
+      MOZ_ASSERT((width + lsb) <= 32);
+
+      out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d",
+                                  instr->Bits(width + lsb - 1, lsb));
+      return 8;
+    }
+    case 'l': {  // 'l: branch and link
+      if (instr->HasLink()) {
+        Print("l");
+      }
+      return 1;
+    }
+    case 'm': {
+      if (format[1] == 'w') {
+        // 'mw: movt/movw instructions.
+        PrintMovwMovt(instr);
+        return 2;
+      }
+      if (format[1] == 'e') {  // 'memop: load/store instructions.
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "memop"));
+        if (instr->HasL()) {
+          Print("ldr");
+        } else {
+          if ((instr->Bits(27, 25) == 0) && (instr->Bit(20) == 0) &&
+              (instr->Bits(7, 6) == 3) && (instr->Bit(4) == 1)) {
+            if (instr->Bit(5) == 1) {
+              Print("strd");
+            } else {
+              Print("ldrd");
+            }
+            return 5;
+          }
+          Print("str");
+        }
+        return 5;
+      }
+      // 'msg: for simulator break instructions
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "msg"));
+      uint8_t* str =
+          reinterpret_cast<uint8_t*>(instr->InstructionBits() & 0x0fffffff);
+      out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%s",
+                                  converter_.NameInCode(str));
+      return 3;
+    }
+    case 'o': {
+      if ((format[3] == '1') && (format[4] == '2')) {
+        // 'off12: 12-bit offset for load and store instructions
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "off12"));
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d",
+                                    instr->Offset12Value());
+        return 5;
+      } else if (format[3] == '0') {
+        // 'off0to3and8to19 16-bit immediate encoded in bits 19-8 and 3-0.
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "off0to3and8to19"));
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, "%d",
+                     (instr->Bits(19, 8) << 4) + instr->Bits(3, 0));
+        return 15;
+      }
+      // 'off8: 8-bit offset for extra load and store instructions
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "off8"));
+      int offs8 = (instr->ImmedHValue() << 4) | instr->ImmedLValue();
+      out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", offs8);
+      return 4;
+    }
+    case 'p': {  // 'pu: P and U bits for load and store instructions
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "pu"));
+      PrintPU(instr);
+      return 2;
+    }
+    case 'r': {
+      return FormatRegister(instr, format);
+    }
+    case 's': {
+      if (format[1] == 'h') {    // 'shift_op or 'shift_rm or 'shift_sat.
+        if (format[6] == 'o') {  // 'shift_op
+          MOZ_ASSERT(STRING_STARTS_WITH(format, "shift_op"));
+          if (instr->TypeValue() == 0) {
+            PrintShiftRm(instr);
+          } else {
+            MOZ_ASSERT(instr->TypeValue() == 1);
+            PrintShiftImm(instr);
+          }
+          return 8;
+        } else if (format[6] == 's') {  // 'shift_sat.
+          MOZ_ASSERT(STRING_STARTS_WITH(format, "shift_sat"));
+          PrintShiftSat(instr);
+          return 9;
+        } else {  // 'shift_rm
+          MOZ_ASSERT(STRING_STARTS_WITH(format, "shift_rm"));
+          PrintShiftRm(instr);
+          return 8;
+        }
+      } else if (format[1] == 'v') {  // 'svc
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "svc"));
+        PrintSoftwareInterrupt(instr->SvcValue());
+        return 3;
+      } else if (format[1] == 'i') {  // 'sign: signed extra loads and stores
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "sign"));
+        if (instr->HasSign()) {
+          Print("s");
+        }
+        return 4;
+      }
+      // 's: S field of data processing instructions
+      if (instr->HasS()) {
+        Print("s");
+      }
+      return 1;
+    }
+    case 't': {  // 'target: target of branch instructions
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "target"));
+      int off = (instr->SImmed24Value() << 2) + 8;
+      out_buffer_pos_ += SNPrintF(
+          out_buffer_ + out_buffer_pos_, "%+d -> %s", off,
+          converter_.NameOfAddress(reinterpret_cast<uint8_t*>(instr) + off));
+      return 6;
+    }
+    case 'u': {  // 'u: signed or unsigned multiplies
+      // The manual gets the meaning of bit 22 backwards in the multiply
+      // instruction overview on page A3.16.2.  The instructions that
+      // exist in u and s variants are the following:
+      // smull A4.1.87
+      // umull A4.1.129
+      // umlal A4.1.128
+      // smlal A4.1.76
+      // For these 0 means u and 1 means s.  As can be seen on their individual
+      // pages.  The other 18 mul instructions have the bit set or unset in
+      // arbitrary ways that are unrelated to the signedness of the instruction.
+      // None of these 18 instructions exist in both a 'u' and an 's' variant.
+
+      if (instr->Bit(22) == 0) {
+        Print("u");
+      } else {
+        Print("s");
+      }
+      return 1;
+    }
+    case 'v': {
+      return FormatVFPinstruction(instr, format);
+    }
+    case 'S':
+    case 'D': {
+      return FormatVFPRegister(instr, format);
+    }
+    case 'w': {  // 'w: W field of load and store instructions
+      if (instr->HasW()) {
+        Print("!");
+      }
+      return 1;
+    }
+    default: {
+      MOZ_CRASH();
+      break;
+    }
+  }
+  MOZ_CRASH();
+  return -1;
+}
+
+// Format takes a formatting string for a whole instruction and prints it into
+// the output buffer. All escaped options are handed to FormatOption to be
+// parsed further.
+void Decoder::Format(Instruction* instr, const char* format) {
+  char cur = *format++;
+  while ((cur != 0) && (out_buffer_pos_ < (out_buffer_.length() - 1))) {
+    if (cur == '\'') {  // Single quote is used as the formatting escape.
+      format += FormatOption(instr, format);
+    } else {
+      out_buffer_[out_buffer_pos_++] = cur;
+    }
+    cur = *format++;
+  }
+  out_buffer_[out_buffer_pos_] = '\0';
+}
+
+// The disassembler may end up decoding data inlined in the code. We do not want
+// it to crash if the data does not ressemble any known instruction.
+#  define VERIFY(condition) \
+    if (!(condition)) {     \
+      Unknown(instr);       \
+      return;               \
+    }
+
+// For currently unimplemented decodings the disassembler calls Unknown(instr)
+// which will just print "unknown" of the instruction bits.
+void Decoder::Unknown(Instruction* instr) { Format(instr, "unknown"); }
+
+void Decoder::DecodeType01(Instruction* instr) {
+  int type = instr->TypeValue();
+  if ((type == 0) && instr->IsSpecialType0()) {
+    // multiply instruction or extra loads and stores
+    if (instr->Bits(7, 4) == 9) {
+      if (instr->Bit(24) == 0) {
+        // multiply instructions
+        if (instr->Bit(23) == 0) {
+          if (instr->Bit(21) == 0) {
+            // The MUL instruction description (A 4.1.33) refers to Rd as being
+            // the destination for the operation, but it confusingly uses the
+            // Rn field to encode it.
+            Format(instr, "mul'cond's 'rn, 'rm, 'rs");
+          } else {
+            if (instr->Bit(22) == 0) {
+              // The MLA instruction description (A 4.1.28) refers to the order
+              // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
+              // Rn field to encode the Rd register and the Rd field to encode
+              // the Rn register.
+              Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+            } else {
+              // The MLS instruction description (A 4.1.29) refers to the order
+              // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
+              // Rn field to encode the Rd register and the Rd field to encode
+              // the Rn register.
+              Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
+            }
+          }
+        } else {
+          // The signed/long multiply instructions use the terms RdHi and RdLo
+          // when referring to the target registers. They are mapped to the Rn
+          // and Rd fields as follows:
+          // RdLo == Rd field
+          // RdHi == Rn field
+          // The order of registers is: <RdLo>, <RdHi>, <Rm>, <Rs>
+          Format(instr, "'um'al'cond's 'rd, 'rn, 'rm, 'rs");
+        }
+      } else {
+        if (instr->Bits(ExclusiveOpHi, ExclusiveOpLo) == ExclusiveOpcode) {
+          if (instr->Bit(ExclusiveLoad) == 1) {
+            switch (instr->Bits(ExclusiveSizeHi, ExclusiveSizeLo)) {
+              case ExclusiveWord:
+                Format(instr, "ldrex'cond 'rt, ['rn]");
+                break;
+              case ExclusiveDouble:
+                Format(instr, "ldrexd'cond 'rt, ['rn]");
+                break;
+              case ExclusiveByte:
+                Format(instr, "ldrexb'cond 'rt, ['rn]");
+                break;
+              case ExclusiveHalf:
+                Format(instr, "ldrexh'cond 'rt, ['rn]");
+                break;
+            }
+          } else {
+            // The documentation names the low four bits of the
+            // store-exclusive instructions "Rt" but canonically
+            // for disassembly they are really "Rm".
+            switch (instr->Bits(ExclusiveSizeHi, ExclusiveSizeLo)) {
+              case ExclusiveWord:
+                Format(instr, "strex'cond 'rd, 'rm, ['rn]");
+                break;
+              case ExclusiveDouble:
+                Format(instr, "strexd'cond 'rd, 'rm, ['rn]");
+                break;
+              case ExclusiveByte:
+                Format(instr, "strexb'cond 'rd, 'rm, ['rn]");
+                break;
+              case ExclusiveHalf:
+                Format(instr, "strexh'cond 'rd, 'rm, ['rn]");
+                break;
+            }
+          }
+        } else {
+          Unknown(instr);
+        }
+      }
+    } else if ((instr->Bit(20) == 0) && ((instr->Bits(7, 4) & 0xd) == 0xd)) {
+      // ldrd, strd
+      switch (instr->PUField()) {
+        case da_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond's 'rd, ['rn], -'rm");
+          } else {
+            Format(instr, "'memop'cond's 'rd, ['rn], #-'off8");
+          }
+          break;
+        }
+        case ia_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond's 'rd, ['rn], +'rm");
+          } else {
+            Format(instr, "'memop'cond's 'rd, ['rn], #+'off8");
+          }
+          break;
+        }
+        case db_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond's 'rd, ['rn, -'rm]'w");
+          } else {
+            Format(instr, "'memop'cond's 'rd, ['rn, #-'off8]'w");
+          }
+          break;
+        }
+        case ib_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond's 'rd, ['rn, +'rm]'w");
+          } else {
+            Format(instr, "'memop'cond's 'rd, ['rn, #+'off8]'w");
+          }
+          break;
+        }
+        default: {
+          // The PU field is a 2-bit field.
+          MOZ_CRASH();
+          break;
+        }
+      }
+    } else {
+      // extra load/store instructions
+      switch (instr->PUField()) {
+        case da_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn], -'rm");
+          } else {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn], #-'off8");
+          }
+          break;
+        }
+        case ia_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn], +'rm");
+          } else {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn], #+'off8");
+          }
+          break;
+        }
+        case db_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn, -'rm]'w");
+          } else {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn, #-'off8]'w");
+          }
+          break;
+        }
+        case ib_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn, +'rm]'w");
+          } else {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn, #+'off8]'w");
+          }
+          break;
+        }
+        default: {
+          // The PU field is a 2-bit field.
+          MOZ_CRASH();
+          break;
+        }
+      }
+      return;
+    }
+  } else if ((type == 0) && instr->IsMiscType0()) {
+    if (instr->Bits(22, 21) == 1) {
+      switch (instr->BitField(7, 4)) {
+        case BX:
+          Format(instr, "bx'cond 'rm");
+          break;
+        case BLX:
+          Format(instr, "blx'cond 'rm");
+          break;
+        case BKPT:
+          Format(instr, "bkpt 'off0to3and8to19");
+          break;
+        default:
+          Unknown(instr);  // not used by V8
+          break;
+      }
+    } else if (instr->Bits(22, 21) == 3) {
+      switch (instr->BitField(7, 4)) {
+        case CLZ:
+          Format(instr, "clz'cond 'rd, 'rm");
+          break;
+        default:
+          Unknown(instr);  // not used by V8
+          break;
+      }
+    } else {
+      Unknown(instr);  // not used by V8
+    }
+  } else if ((type == 1) && instr->IsNopType1()) {
+    Format(instr, "nop'cond");
+  } else if ((type == 1) && instr->IsCsdbType1()) {
+    Format(instr, "csdb'cond");
+  } else {
+    switch (instr->OpcodeField()) {
+      case AND: {
+        Format(instr, "and'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case EOR: {
+        Format(instr, "eor'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case SUB: {
+        Format(instr, "sub'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case RSB: {
+        Format(instr, "rsb'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case ADD: {
+        Format(instr, "add'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case ADC: {
+        Format(instr, "adc'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case SBC: {
+        Format(instr, "sbc'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case RSC: {
+        Format(instr, "rsc'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case TST: {
+        if (instr->HasS()) {
+          Format(instr, "tst'cond 'rn, 'shift_op");
+        } else {
+          Format(instr, "movw'cond 'mw");
+        }
+        break;
+      }
+      case TEQ: {
+        if (instr->HasS()) {
+          Format(instr, "teq'cond 'rn, 'shift_op");
+        } else {
+          // Other instructions matching this pattern are handled in the
+          // miscellaneous instructions part above.
+          MOZ_CRASH();
+        }
+        break;
+      }
+      case CMP: {
+        if (instr->HasS()) {
+          Format(instr, "cmp'cond 'rn, 'shift_op");
+        } else {
+          Format(instr, "movt'cond 'mw");
+        }
+        break;
+      }
+      case CMN: {
+        if (instr->HasS()) {
+          Format(instr, "cmn'cond 'rn, 'shift_op");
+        } else {
+          // Other instructions matching this pattern are handled in the
+          // miscellaneous instructions part above.
+          MOZ_CRASH();
+        }
+        break;
+      }
+      case ORR: {
+        Format(instr, "orr'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case MOV: {
+        Format(instr, "mov'cond's 'rd, 'shift_op");
+        break;
+      }
+      case BIC: {
+        Format(instr, "bic'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case MVN: {
+        Format(instr, "mvn'cond's 'rd, 'shift_op");
+        break;
+      }
+      default: {
+        // The Opcode field is a 4-bit field.
+        MOZ_CRASH();
+        break;
+      }
+    }
+  }
+}
+
+void Decoder::DecodeType2(Instruction* instr) {
+  switch (instr->PUField()) {
+    case da_x: {
+      if (instr->HasW()) {
+        Unknown(instr);  // not used in V8
+        return;
+      }
+      Format(instr, "'memop'cond'b 'rd, ['rn], #-'off12");
+      break;
+    }
+    case ia_x: {
+      if (instr->HasW()) {
+        Unknown(instr);  // not used in V8
+        return;
+      }
+      Format(instr, "'memop'cond'b 'rd, ['rn], #+'off12");
+      break;
+    }
+    case db_x: {
+      Format(instr, "'memop'cond'b 'rd, ['rn, #-'off12]'w");
+      break;
+    }
+    case ib_x: {
+      Format(instr, "'memop'cond'b 'rd, ['rn, #+'off12]'w");
+      break;
+    }
+    default: {
+      // The PU field is a 2-bit field.
+      MOZ_CRASH();
+      break;
+    }
+  }
+}
+
+void Decoder::DecodeType3(Instruction* instr) {
+  switch (instr->PUField()) {
+    case da_x: {
+      VERIFY(!instr->HasW());
+      Format(instr, "'memop'cond'b 'rd, ['rn], -'shift_rm");
+      break;
+    }
+    case ia_x: {
+      if (instr->Bit(4) == 0) {
+        Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm");
+      } else {
+        if (instr->Bit(5) == 0) {
+          switch (instr->Bits(22, 21)) {
+            case 0:
+              if (instr->Bit(20) == 0) {
+                if (instr->Bit(6) == 0) {
+                  Format(instr, "pkhbt'cond 'rd, 'rn, 'rm, lsl #'imm05@07");
+                } else {
+                  if (instr->Bits(11, 7) == 0) {
+                    Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #32");
+                  } else {
+                    Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #'imm05@07");
+                  }
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 1:
+              MOZ_CRASH();
+              break;
+            case 2:
+              MOZ_CRASH();
+              break;
+            case 3:
+              Format(instr, "usat 'rd, #'imm05@16, 'rm'shift_sat");
+              break;
+          }
+        } else {
+          switch (instr->Bits(22, 21)) {
+            case 0:
+              MOZ_CRASH();
+              break;
+            case 1:
+              if (instr->Bits(9, 6) == 1) {
+                if (instr->Bit(20) == 0) {
+                  if (instr->Bits(19, 16) == 0xF) {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "sxtb'cond 'rd, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "sxtb'cond 'rd, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "sxtb'cond 'rd, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "sxtb'cond 'rd, 'rm, ror #24");
+                        break;
+                    }
+                  } else {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "sxtab'cond 'rd, 'rn, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "sxtab'cond 'rd, 'rn, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "sxtab'cond 'rd, 'rn, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "sxtab'cond 'rd, 'rn, 'rm, ror #24");
+                        break;
+                    }
+                  }
+                } else {
+                  if (instr->Bits(19, 16) == 0xF) {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "sxth'cond 'rd, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "sxth'cond 'rd, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "sxth'cond 'rd, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "sxth'cond 'rd, 'rm, ror #24");
+                        break;
+                    }
+                  } else {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "sxtah'cond 'rd, 'rn, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "sxtah'cond 'rd, 'rn, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "sxtah'cond 'rd, 'rn, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "sxtah'cond 'rd, 'rn, 'rm, ror #24");
+                        break;
+                    }
+                  }
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 2:
+              if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
+                if (instr->Bits(19, 16) == 0xF) {
+                  switch (instr->Bits(11, 10)) {
+                    case 0:
+                      Format(instr, "uxtb16'cond 'rd, 'rm");
+                      break;
+                    case 1:
+                      Format(instr, "uxtb16'cond 'rd, 'rm, ror #8");
+                      break;
+                    case 2:
+                      Format(instr, "uxtb16'cond 'rd, 'rm, ror #16");
+                      break;
+                    case 3:
+                      Format(instr, "uxtb16'cond 'rd, 'rm, ror #24");
+                      break;
+                  }
+                } else {
+                  MOZ_CRASH();
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 3:
+              if ((instr->Bits(9, 6) == 1)) {
+                if ((instr->Bit(20) == 0)) {
+                  if (instr->Bits(19, 16) == 0xF) {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "uxtb'cond 'rd, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "uxtb'cond 'rd, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "uxtb'cond 'rd, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "uxtb'cond 'rd, 'rm, ror #24");
+                        break;
+                    }
+                  } else {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "uxtab'cond 'rd, 'rn, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #24");
+                        break;
+                    }
+                  }
+                } else {
+                  if (instr->Bits(19, 16) == 0xF) {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "uxth'cond 'rd, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "uxth'cond 'rd, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "uxth'cond 'rd, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "uxth'cond 'rd, 'rm, ror #24");
+                        break;
+                    }
+                  } else {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "uxtah'cond 'rd, 'rn, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "uxtah'cond 'rd, 'rn, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "uxtah'cond 'rd, 'rn, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "uxtah'cond 'rd, 'rn, 'rm, ror #24");
+                        break;
+                    }
+                  }
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+          }
+        }
+      }
+      break;
+    }
+    case db_x: {
+      if (instr->Bits(22, 20) == 0x5) {
+        if (instr->Bits(7, 4) == 0x1) {
+          if (instr->Bits(15, 12) == 0xF) {
+            Format(instr, "smmul'cond 'rn, 'rm, 'rs");
+          } else {
+            // SMMLA (in V8 notation matching ARM ISA format)
+            Format(instr, "smmla'cond 'rn, 'rm, 'rs, 'rd");
+          }
+          break;
+        }
+      }
+      bool FLAG_enable_sudiv = true;  // Flag doesn't exist in our engine.
+      if (FLAG_enable_sudiv) {
+        if (instr->Bits(5, 4) == 0x1) {
+          if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
+            if (instr->Bit(21) == 0x1) {
+              // UDIV (in V8 notation matching ARM ISA format) rn = rm/rs
+              Format(instr, "udiv'cond'b 'rn, 'rm, 'rs");
+            } else {
+              // SDIV (in V8 notation matching ARM ISA format) rn = rm/rs
+              Format(instr, "sdiv'cond'b 'rn, 'rm, 'rs");
+            }
+            break;
+          }
+        }
+      }
+      Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
+      break;
+    }
+    case ib_x: {
+      if (instr->HasW() && (instr->Bits(6, 4) == 0x5)) {
+        uint32_t widthminus1 = static_cast<uint32_t>(instr->Bits(20, 16));
+        uint32_t lsbit = static_cast<uint32_t>(instr->Bits(11, 7));
+        uint32_t msbit = widthminus1 + lsbit;
+        if (msbit <= 31) {
+          if (instr->Bit(22)) {
+            Format(instr, "ubfx'cond 'rd, 'rm, 'f");
+          } else {
+            Format(instr, "sbfx'cond 'rd, 'rm, 'f");
+          }
+        } else {
+          MOZ_CRASH();
+        }
+      } else if (!instr->HasW() && (instr->Bits(6, 4) == 0x1)) {
+        uint32_t lsbit = static_cast<uint32_t>(instr->Bits(11, 7));
+        uint32_t msbit = static_cast<uint32_t>(instr->Bits(20, 16));
+        if (msbit >= lsbit) {
+          if (instr->RmValue() == 15) {
+            Format(instr, "bfc'cond 'rd, 'f");
+          } else {
+            Format(instr, "bfi'cond 'rd, 'rm, 'f");
+          }
+        } else {
+          MOZ_CRASH();
+        }
+      } else {
+        Format(instr, "'memop'cond'b 'rd, ['rn, +'shift_rm]'w");
+      }
+      break;
+    }
+    default: {
+      // The PU field is a 2-bit field.
+      MOZ_CRASH();
+      break;
+    }
+  }
+}
+
+void Decoder::DecodeType4(Instruction* instr) {
+  if (instr->Bit(22) != 0) {
+    // Privileged mode currently not supported.
+    Unknown(instr);
+  } else {
+    if (instr->HasL()) {
+      Format(instr, "ldm'cond'pu 'rn'w, 'rlist");
+    } else {
+      Format(instr, "stm'cond'pu 'rn'w, 'rlist");
+    }
+  }
+}
+
+void Decoder::DecodeType5(Instruction* instr) {
+  Format(instr, "b'l'cond 'target");
+}
+
+void Decoder::DecodeType6(Instruction* instr) {
+  DecodeType6CoprocessorIns(instr);
+}
+
+int Decoder::DecodeType7(Instruction* instr) {
+  if (instr->Bit(24) == 1) {
+    if (instr->SvcValue() >= kStopCode) {
+      Format(instr, "stop'cond 'svc");
+      // Also print the stop message. Its address is encoded
+      // in the following 4 bytes.
+      out_buffer_pos_ += SNPrintF(
+          out_buffer_ + out_buffer_pos_, "\n  %p  %08x       stop message: %s",
+          reinterpret_cast<void*>(instr + Instruction::kInstrSize),
+          *reinterpret_cast<uint32_t*>(instr + Instruction::kInstrSize),
+          *reinterpret_cast<char**>(instr + Instruction::kInstrSize));
+      // We have decoded 2 * Instruction::kInstrSize bytes.
+      return 2 * Instruction::kInstrSize;
+    } else {
+      Format(instr, "svc'cond 'svc");
+    }
+  } else {
+    DecodeTypeVFP(instr);
+  }
+  return Instruction::kInstrSize;
+}
+
+// void Decoder::DecodeTypeVFP(Instruction* instr)
+// vmov: Sn = Rt
+// vmov: Rt = Sn
+// vcvt: Dd = Sm
+// vcvt: Sd = Dm
+// vcvt.f64.s32 Dd, Dd, #<fbits>
+// Dd = vabs(Dm)
+// Sd = vabs(Sm)
+// Dd = vneg(Dm)
+// Sd = vneg(Sm)
+// Dd = vadd(Dn, Dm)
+// Sd = vadd(Sn, Sm)
+// Dd = vsub(Dn, Dm)
+// Sd = vsub(Sn, Sm)
+// Dd = vmul(Dn, Dm)
+// Sd = vmul(Sn, Sm)
+// Dd = vmla(Dn, Dm)
+// Sd = vmla(Sn, Sm)
+// Dd = vmls(Dn, Dm)
+// Sd = vmls(Sn, Sm)
+// Dd = vdiv(Dn, Dm)
+// Sd = vdiv(Sn, Sm)
+// vcmp(Dd, Dm)
+// vcmp(Sd, Sm)
+// Dd = vsqrt(Dm)
+// Sd = vsqrt(Sm)
+// vmrs
+// vmsr
+void Decoder::DecodeTypeVFP(Instruction* instr) {
+  VERIFY((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0));
+  VERIFY(instr->Bits(11, 9) == 0x5);
+
+  if (instr->Bit(4) == 0) {
+    if (instr->Opc1Value() == 0x7) {
+      // Other data processing instructions
+      if ((instr->Opc2Value() == 0x0) && (instr->Opc3Value() == 0x1)) {
+        // vmov register to register.
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vmov'cond.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vmov'cond.f32 'Sd, 'Sm");
+        }
+      } else if ((instr->Opc2Value() == 0x0) && (instr->Opc3Value() == 0x3)) {
+        // vabs
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vabs'cond.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vabs'cond.f32 'Sd, 'Sm");
+        }
+      } else if ((instr->Opc2Value() == 0x1) && (instr->Opc3Value() == 0x1)) {
+        // vneg
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vneg'cond.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vneg'cond.f32 'Sd, 'Sm");
+        }
+      } else if ((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3)) {
+        DecodeVCVTBetweenDoubleAndSingle(instr);
+      } else if ((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) {
+        DecodeVCVTBetweenFloatingPointAndInteger(instr);
+      } else if ((instr->Opc2Value() == 0xA) && (instr->Opc3Value() == 0x3) &&
+                 (instr->Bit(8) == 1)) {
+        // vcvt.f64.s32 Dd, Dd, #<fbits>
+        int fraction_bits = 32 - ((instr->Bits(3, 0) << 1) | instr->Bit(5));
+        Format(instr, "vcvt'cond.f64.s32 'Dd, 'Dd");
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, ", #%d", fraction_bits);
+      } else if (((instr->Opc2Value() >> 1) == 0x6) &&
+                 (instr->Opc3Value() & 0x1)) {
+        DecodeVCVTBetweenFloatingPointAndInteger(instr);
+      } else if (((instr->Opc2Value() == 0x4) || (instr->Opc2Value() == 0x5)) &&
+                 (instr->Opc3Value() & 0x1)) {
+        DecodeVCMP(instr);
+      } else if (((instr->Opc2Value() == 0x1)) && (instr->Opc3Value() == 0x3)) {
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vsqrt'cond.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vsqrt'cond.f32 'Sd, 'Sm");
+        }
+      } else if (instr->Opc3Value() == 0x0) {
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vmov'cond.f64 'Dd, 'd");
+        } else {
+          Unknown(instr);  // Not used by V8.
+        }
+      } else if (((instr->Opc2Value() == 0x6)) && instr->Opc3Value() == 0x3) {
+        // vrintz - round towards zero (truncate)
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vrintz'cond.f64.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vrintz'cond.f32.f32 'Sd, 'Sm");
+        }
+      } else {
+        Unknown(instr);  // Not used by V8.
+      }
+    } else if (instr->Opc1Value() == 0x3) {
+      if (instr->SzValue() == 0x1) {
+        if (instr->Opc3Value() & 0x1) {
+          Format(instr, "vsub'cond.f64 'Dd, 'Dn, 'Dm");
+        } else {
+          Format(instr, "vadd'cond.f64 'Dd, 'Dn, 'Dm");
+        }
+      } else {
+        if (instr->Opc3Value() & 0x1) {
+          Format(instr, "vsub'cond.f32 'Sd, 'Sn, 'Sm");
+        } else {
+          Format(instr, "vadd'cond.f32 'Sd, 'Sn, 'Sm");
+        }
+      }
+    } else if ((instr->Opc1Value() == 0x2) && !(instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vmul'cond.f64 'Dd, 'Dn, 'Dm");
+      } else {
+        Format(instr, "vmul'cond.f32 'Sd, 'Sn, 'Sm");
+      }
+    } else if ((instr->Opc1Value() == 0x0) && !(instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vmla'cond.f64 'Dd, 'Dn, 'Dm");
+      } else {
+        Format(instr, "vmla'cond.f32 'Sd, 'Sn, 'Sm");
+      }
+    } else if ((instr->Opc1Value() == 0x0) && (instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vmls'cond.f64 'Dd, 'Dn, 'Dm");
+      } else {
+        Format(instr, "vmls'cond.f32 'Sd, 'Sn, 'Sm");
+      }
+    } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vdiv'cond.f64 'Dd, 'Dn, 'Dm");
+      } else {
+        Format(instr, "vdiv'cond.f32 'Sd, 'Sn, 'Sm");
+      }
+    } else {
+      Unknown(instr);  // Not used by V8.
+    }
+  } else {
+    if ((instr->VCValue() == 0x0) && (instr->VAValue() == 0x0)) {
+      DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
+    } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1) &&
+               (instr->Bit(23) == 0x0)) {
+      if (instr->Bit(21) == 0x0) {
+        Format(instr, "vmov'cond.32 'Dd[0], 'rt");
+      } else {
+        Format(instr, "vmov'cond.32 'Dd[1], 'rt");
+      }
+    } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1) &&
+               (instr->Bit(23) == 0x0)) {
+      if (instr->Bit(21) == 0x0) {
+        Format(instr, "vmov'cond.32 'rt, 'Dd[0]");
+      } else {
+        Format(instr, "vmov'cond.32 'rt, 'Dd[1]");
+      }
+    } else if ((instr->VCValue() == 0x0) && (instr->VAValue() == 0x7) &&
+               (instr->Bits(19, 16) == 0x1)) {
+      if (instr->VLValue() == 0) {
+        if (instr->Bits(15, 12) == 0xF) {
+          Format(instr, "vmsr'cond FPSCR, APSR");
+        } else {
+          Format(instr, "vmsr'cond FPSCR, 'rt");
+        }
+      } else {
+        if (instr->Bits(15, 12) == 0xF) {
+          Format(instr, "vmrs'cond APSR, FPSCR");
+        } else {
+          Format(instr, "vmrs'cond 'rt, FPSCR");
+        }
+      }
+    }
+  }
+}
+
+void Decoder::DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(
+    Instruction* instr) {
+  VERIFY((instr->Bit(4) == 1) && (instr->VCValue() == 0x0) &&
+         (instr->VAValue() == 0x0));
+
+  bool to_arm_register = (instr->VLValue() == 0x1);
+
+  if (to_arm_register) {
+    Format(instr, "vmov'cond 'rt, 'Sn");
+  } else {
+    Format(instr, "vmov'cond 'Sn, 'rt");
+  }
+}
+
+void Decoder::DecodeVCMP(Instruction* instr) {
+  VERIFY((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7));
+  VERIFY(((instr->Opc2Value() == 0x4) || (instr->Opc2Value() == 0x5)) &&
+         (instr->Opc3Value() & 0x1));
+
+  // Comparison.
+  bool dp_operation = (instr->SzValue() == 1);
+  bool raise_exception_for_qnan = (instr->Bit(7) == 0x1);
+
+  if (dp_operation && !raise_exception_for_qnan) {
+    if (instr->Opc2Value() == 0x4) {
+      Format(instr, "vcmp'cond.f64 'Dd, 'Dm");
+    } else if (instr->Opc2Value() == 0x5) {
+      Format(instr, "vcmp'cond.f64 'Dd, #0.0");
+    } else {
+      Unknown(instr);  // invalid
+    }
+  } else if (!raise_exception_for_qnan) {
+    if (instr->Opc2Value() == 0x4) {
+      Format(instr, "vcmp'cond.f32 'Sd, 'Sm");
+    } else if (instr->Opc2Value() == 0x5) {
+      Format(instr, "vcmp'cond.f32 'Sd, #0.0");
+    } else {
+      Unknown(instr);  // invalid
+    }
+  } else {
+    Unknown(instr);  // Not used by V8.
+  }
+}
+
+void Decoder::DecodeVCVTBetweenDoubleAndSingle(Instruction* instr) {
+  VERIFY((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7));
+  VERIFY((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3));
+
+  bool double_to_single = (instr->SzValue() == 1);
+
+  if (double_to_single) {
+    Format(instr, "vcvt'cond.f32.f64 'Sd, 'Dm");
+  } else {
+    Format(instr, "vcvt'cond.f64.f32 'Dd, 'Sm");
+  }
+}
+
+void Decoder::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
+  VERIFY((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7));
+  VERIFY(((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) ||
+         (((instr->Opc2Value() >> 1) == 0x6) && (instr->Opc3Value() & 0x1)));
+
+  bool to_integer = (instr->Bit(18) == 1);
+  bool dp_operation = (instr->SzValue() == 1);
+  if (to_integer) {
+    bool unsigned_integer = (instr->Bit(16) == 0);
+
+    if (dp_operation) {
+      if (unsigned_integer) {
+        Format(instr, "vcvt'cond.u32.f64 'Sd, 'Dm");
+      } else {
+        Format(instr, "vcvt'cond.s32.f64 'Sd, 'Dm");
+      }
+    } else {
+      if (unsigned_integer) {
+        Format(instr, "vcvt'cond.u32.f32 'Sd, 'Sm");
+      } else {
+        Format(instr, "vcvt'cond.s32.f32 'Sd, 'Sm");
+      }
+    }
+  } else {
+    bool unsigned_integer = (instr->Bit(7) == 0);
+
+    if (dp_operation) {
+      if (unsigned_integer) {
+        Format(instr, "vcvt'cond.f64.u32 'Dd, 'Sm");
+      } else {
+        Format(instr, "vcvt'cond.f64.s32 'Dd, 'Sm");
+      }
+    } else {
+      if (unsigned_integer) {
+        Format(instr, "vcvt'cond.f32.u32 'Sd, 'Sm");
+      } else {
+        Format(instr, "vcvt'cond.f32.s32 'Sd, 'Sm");
+      }
+    }
+  }
+}
+
+// Decode Type 6 coprocessor instructions.
+// Dm = vmov(Rt, Rt2)
+// <Rt, Rt2> = vmov(Dm)
+// Ddst = MEM(Rbase + 4*offset).
+// MEM(Rbase + 4*offset) = Dsrc.
+void Decoder::DecodeType6CoprocessorIns(Instruction* instr) {
+  VERIFY(instr->TypeValue() == 6);
+
+  if (instr->CoprocessorValue() == 0xA) {
+    switch (instr->OpcodeValue()) {
+      case 0x8:
+      case 0xA:
+        if (instr->HasL()) {
+          Format(instr, "vldr'cond 'Sd, ['rn - 4*'imm08@00]");
+        } else {
+          Format(instr, "vstr'cond 'Sd, ['rn - 4*'imm08@00]");
+        }
+        break;
+      case 0xC:
+      case 0xE:
+        if (instr->HasL()) {
+          Format(instr, "vldr'cond 'Sd, ['rn + 4*'imm08@00]");
+        } else {
+          Format(instr, "vstr'cond 'Sd, ['rn + 4*'imm08@00]");
+        }
+        break;
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7:
+      case 0x9:
+      case 0xB: {
+        bool to_vfp_register = (instr->VLValue() == 0x1);
+        if (to_vfp_register) {
+          Format(instr, "vldm'cond'pu 'rn'w, {'Sd-'Sd+}");
+        } else {
+          Format(instr, "vstm'cond'pu 'rn'w, {'Sd-'Sd+}");
+        }
+        break;
+      }
+      default:
+        Unknown(instr);  // Not used by V8.
+    }
+  } else if (instr->CoprocessorValue() == 0xB) {
+    switch (instr->OpcodeValue()) {
+      case 0x2:
+        // Load and store double to two GP registers
+        if (instr->Bits(7, 6) != 0 || instr->Bit(4) != 1) {
+          Unknown(instr);  // Not used by V8.
+        } else if (instr->HasL()) {
+          Format(instr, "vmov'cond 'rt, 'rn, 'Dm");
+        } else {
+          Format(instr, "vmov'cond 'Dm, 'rt, 'rn");
+        }
+        break;
+      case 0x8:
+      case 0xA:
+        if (instr->HasL()) {
+          Format(instr, "vldr'cond 'Dd, ['rn - 4*'imm08@00]");
+        } else {
+          Format(instr, "vstr'cond 'Dd, ['rn - 4*'imm08@00]");
+        }
+        break;
+      case 0xC:
+      case 0xE:
+        if (instr->HasL()) {
+          Format(instr, "vldr'cond 'Dd, ['rn + 4*'imm08@00]");
+        } else {
+          Format(instr, "vstr'cond 'Dd, ['rn + 4*'imm08@00]");
+        }
+        break;
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7:
+      case 0x9:
+      case 0xB: {
+        bool to_vfp_register = (instr->VLValue() == 0x1);
+        if (to_vfp_register) {
+          Format(instr, "vldm'cond'pu 'rn'w, {'Dd-'Dd+}");
+        } else {
+          Format(instr, "vstm'cond'pu 'rn'w, {'Dd-'Dd+}");
+        }
+        break;
+      }
+      default:
+        Unknown(instr);  // Not used by V8.
+    }
+  } else {
+    Unknown(instr);  // Not used by V8.
+  }
+}
+
+void Decoder::DecodeSpecialCondition(Instruction* instr) {
+  switch (instr->SpecialValue()) {
+    case 5:
+      if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
+          (instr->Bit(4) == 1)) {
+        // vmovl signed
+        if ((instr->VdValue() & 1) != 0) Unknown(instr);
+        int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
+        int Vm = (instr->Bit(5) << 4) | instr->VmValue();
+        int imm3 = instr->Bits(21, 19);
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                    "vmovl.s%d q%d, d%d", imm3 * 8, Vd, Vm);
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 7:
+      if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
+          (instr->Bit(4) == 1)) {
+        // vmovl unsigned
+        if ((instr->VdValue() & 1) != 0) Unknown(instr);
+        int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
+        int Vm = (instr->Bit(5) << 4) | instr->VmValue();
+        int imm3 = instr->Bits(21, 19);
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                    "vmovl.u%d q%d, d%d", imm3 * 8, Vd, Vm);
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 8:
+      if (instr->Bits(21, 20) == 0) {
+        // vst1
+        int Vd = (instr->Bit(22) << 4) | instr->VdValue();
+        int Rn = instr->VnValue();
+        int type = instr->Bits(11, 8);
+        int size = instr->Bits(7, 6);
+        int align = instr->Bits(5, 4);
+        int Rm = instr->VmValue();
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vst1.%d ",
+                                    (1 << size) << 3);
+        FormatNeonList(Vd, type);
+        Print(", ");
+        FormatNeonMemory(Rn, align, Rm);
+      } else if (instr->Bits(21, 20) == 2) {
+        // vld1
+        int Vd = (instr->Bit(22) << 4) | instr->VdValue();
+        int Rn = instr->VnValue();
+        int type = instr->Bits(11, 8);
+        int size = instr->Bits(7, 6);
+        int align = instr->Bits(5, 4);
+        int Rm = instr->VmValue();
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vld1.%d ",
+                                    (1 << size) << 3);
+        FormatNeonList(Vd, type);
+        Print(", ");
+        FormatNeonMemory(Rn, align, Rm);
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 9:
+      if (instr->Bits(21, 20) == 0 && instr->Bits(9, 8) == 0) {
+        // vst1
+        int Vd = (instr->Bit(22) << 4) | instr->VdValue();
+        int Rn = instr->VnValue();
+        int size = instr->Bits(11, 10);
+        int index = instr->Bits(7, 5);
+        int align = instr->Bit(4);
+        int Rm = instr->VmValue();
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, "vst1.%d {d%d[%d]}, ",
+                     (1 << size) << 3, Vd, index);
+        FormatNeonMemory(Rn, align, Rm);
+      } else if (instr->Bits(21, 20) == 2 && instr->Bits(9, 8) == 0) {
+        // vld1
+        int Vd = (instr->Bit(22) << 4) | instr->VdValue();
+        int Rn = instr->VnValue();
+        int size = instr->Bits(11, 10);
+        int index = instr->Bits(7, 5);
+        int align = instr->Bit(4);
+        int Rm = instr->VmValue();
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, "vld1.%d {d%d[%d]}, ",
+                     (1 << size) << 3, Vd, index);
+        FormatNeonMemory(Rn, align, Rm);
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 0xA:
+      if (instr->Bits(22, 20) == 7) {
+        const char* option = "?";
+        switch (instr->Bits(3, 0)) {
+          case 2:
+            option = "oshst";
+            break;
+          case 3:
+            option = "osh";
+            break;
+          case 6:
+            option = "nshst";
+            break;
+          case 7:
+            option = "nsh";
+            break;
+          case 10:
+            option = "ishst";
+            break;
+          case 11:
+            option = "ish";
+            break;
+          case 14:
+            option = "st";
+            break;
+          case 15:
+            option = "sy";
+            break;
+        }
+        switch (instr->Bits(7, 4)) {
+          case 1:
+            Print("clrex");
+            break;
+          case 4:
+            out_buffer_pos_ +=
+                SNPrintF(out_buffer_ + out_buffer_pos_, "dsb %s", option);
+            break;
+          case 5:
+            out_buffer_pos_ +=
+                SNPrintF(out_buffer_ + out_buffer_pos_, "dmb %s", option);
+            break;
+          default:
+            Unknown(instr);
+        }
+        break;
+      }
+      [[fallthrough]];
+    case 0xB:
+      if ((instr->Bits(22, 20) == 5) && (instr->Bits(15, 12) == 0xf)) {
+        int Rn = instr->Bits(19, 16);
+        int offset = instr->Bits(11, 0);
+        if (offset == 0) {
+          out_buffer_pos_ +=
+              SNPrintF(out_buffer_ + out_buffer_pos_, "pld [r%d]", Rn);
+        } else if (instr->Bit(23) == 0) {
+          out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                      "pld [r%d, #-%d]", Rn, offset);
+        } else {
+          out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                      "pld [r%d, #+%d]", Rn, offset);
+        }
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 0x1D:
+      if (instr->Opc1Value() == 0x7 && instr->Bits(19, 18) == 0x2 &&
+          instr->Bits(11, 9) == 0x5 && instr->Bits(7, 6) == 0x1 &&
+          instr->Bit(4) == 0x0) {
+        // VRINTA, VRINTN, VRINTP, VRINTM (floating-point)
+        bool dp_operation = (instr->SzValue() == 1);
+        int rounding_mode = instr->Bits(17, 16);
+        switch (rounding_mode) {
+          case 0x0:
+            if (dp_operation) {
+              Format(instr, "vrinta.f64.f64 'Dd, 'Dm");
+            } else {
+              Unknown(instr);
+            }
+            break;
+          case 0x1:
+            if (dp_operation) {
+              Format(instr, "vrintn.f64.f64 'Dd, 'Dm");
+            } else {
+              Unknown(instr);
+            }
+            break;
+          case 0x2:
+            if (dp_operation) {
+              Format(instr, "vrintp.f64.f64 'Dd, 'Dm");
+            } else {
+              Unknown(instr);
+            }
+            break;
+          case 0x3:
+            if (dp_operation) {
+              Format(instr, "vrintm.f64.f64 'Dd, 'Dm");
+            } else {
+              Unknown(instr);
+            }
+            break;
+          default:
+            MOZ_CRASH();  // Case analysis is exhaustive.
+            break;
+        }
+      } else {
+        Unknown(instr);
+      }
+      break;
+    default:
+      Unknown(instr);
+      break;
+  }
+}
+
+#  undef VERIFIY
+
+bool Decoder::IsConstantPoolAt(uint8_t* instr_ptr) {
+  int instruction_bits = *(reinterpret_cast<int*>(instr_ptr));
+  return (instruction_bits & kConstantPoolMarkerMask) == kConstantPoolMarker;
+}
+
+int Decoder::ConstantPoolSizeAt(uint8_t* instr_ptr) {
+  if (IsConstantPoolAt(instr_ptr)) {
+    int instruction_bits = *(reinterpret_cast<int*>(instr_ptr));
+    return DecodeConstantPoolLength(instruction_bits);
+  } else {
+    return -1;
+  }
+}
+
+// Disassemble the instruction at *instr_ptr into the output buffer.
+int Decoder::InstructionDecode(uint8_t* instr_ptr) {
+  Instruction* instr = Instruction::At(instr_ptr);
+  // Print raw instruction bytes.
+  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%08x       ",
+                              instr->InstructionBits());
+  if (instr->ConditionField() == kSpecialCondition) {
+    DecodeSpecialCondition(instr);
+    return Instruction::kInstrSize;
+  }
+  int instruction_bits = *(reinterpret_cast<int*>(instr_ptr));
+  if ((instruction_bits & kConstantPoolMarkerMask) == kConstantPoolMarker) {
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                "constant pool begin (length %d)",
+                                DecodeConstantPoolLength(instruction_bits));
+    return Instruction::kInstrSize;
+  } else if (instruction_bits == kCodeAgeJumpInstruction) {
+    // The code age prologue has a constant immediatly following the jump
+    // instruction.
+    Instruction* target = Instruction::At(instr_ptr + Instruction::kInstrSize);
+    DecodeType2(instr);
+    SNPrintF(out_buffer_ + out_buffer_pos_, " (0x%08x)",
+             target->InstructionBits());
+    return 2 * Instruction::kInstrSize;
+  }
+  switch (instr->TypeValue()) {
+    case 0:
+    case 1: {
+      DecodeType01(instr);
+      break;
+    }
+    case 2: {
+      DecodeType2(instr);
+      break;
+    }
+    case 3: {
+      DecodeType3(instr);
+      break;
+    }
+    case 4: {
+      DecodeType4(instr);
+      break;
+    }
+    case 5: {
+      DecodeType5(instr);
+      break;
+    }
+    case 6: {
+      DecodeType6(instr);
+      break;
+    }
+    case 7: {
+      return DecodeType7(instr);
+    }
+    default: {
+      // The type field is 3-bits in the ARM encoding.
+      MOZ_CRASH();
+      break;
+    }
+  }
+  return Instruction::kInstrSize;
+}
+
+}  // namespace disasm
+
+#  undef STRING_STARTS_WITH
+#  undef VERIFY
+
+//------------------------------------------------------------------------------
+
+namespace disasm {
+
+const char* NameConverter::NameOfAddress(uint8_t* addr) const {
+  SNPrintF(tmp_buffer_, "%p", addr);
+  return tmp_buffer_.start();
+}
+
+const char* NameConverter::NameOfConstant(uint8_t* addr) const {
+  return NameOfAddress(addr);
+}
+
+const char* NameConverter::NameOfCPURegister(int reg) const {
+  return disasm::Registers::Name(reg);
+}
+
+const char* NameConverter::NameOfByteCPURegister(int reg) const {
+  MOZ_CRASH();  // ARM does not have the concept of a byte register
+  return "nobytereg";
+}
+
+const char* NameConverter::NameOfXMMRegister(int reg) const {
+  MOZ_CRASH();  // ARM does not have any XMM registers
+  return "noxmmreg";
+}
+
+const char* NameConverter::NameInCode(uint8_t* addr) const {
+  // The default name converter is called for unknown code. So we will not try
+  // to access any memory.
+  return "";
+}
+
+//------------------------------------------------------------------------------
+
+Disassembler::Disassembler(const NameConverter& converter)
+    : converter_(converter) {}
+
+Disassembler::~Disassembler() {}
+
+int Disassembler::InstructionDecode(V8Vector<char> buffer,
+                                    uint8_t* instruction) {
+  Decoder d(converter_, buffer);
+  return d.InstructionDecode(instruction);
+}
+
+int Disassembler::ConstantPoolSizeAt(uint8_t* instruction) {
+  return Decoder::ConstantPoolSizeAt(instruction);
+}
+
+void Disassembler::Disassemble(FILE* f, uint8_t* begin, uint8_t* end) {
+  NameConverter converter;
+  Disassembler d(converter);
+  for (uint8_t* pc = begin; pc < end;) {
+    EmbeddedVector<char, ReasonableBufferSize> buffer;
+    buffer[0] = '\0';
+    uint8_t* prev_pc = pc;
+    pc += d.InstructionDecode(buffer, pc);
+    fprintf(f, "%p    %08x      %s\n", prev_pc,
+            *reinterpret_cast<int32_t*>(prev_pc), buffer.start());
+  }
+}
+
+}  // namespace disasm
+}  // namespace jit
+}  // namespace js
+
+#endif  // JS_DISASM_ARM
diff --git a/js/src/jit/arm/disasm/Disasm-arm.h b/js/src/jit/arm/disasm/Disasm-arm.h
new file mode 100644
index 0000000000..8a0dd97c32
--- /dev/null
+++ b/js/src/jit/arm/disasm/Disasm-arm.h
@@ -0,0 +1,141 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+// Copyright 2007-2008 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef jit_arm_disasm_Disasm_arm_h
+#define jit_arm_disasm_Disasm_arm_h
+
+#ifdef JS_DISASM_ARM
+
+#  include "mozilla/Assertions.h"
+#  include "mozilla/Types.h"
+
+#  include <stdio.h>
+
+namespace js {
+namespace jit {
+namespace disasm {
+
+typedef unsigned char byte;
+
+// A reasonable (ie, safe) buffer size for the disassembly of a single
+// instruction.
+const int ReasonableBufferSize = 256;
+
+// Vector as used by the original code to allow for minimal modification.
+// Functions exactly like a character array with helper methods.
+template <typename T>
+class V8Vector {
+ public:
+  V8Vector() : start_(nullptr), length_(0) {}
+  V8Vector(T* data, int length) : start_(data), length_(length) {
+    MOZ_ASSERT(length == 0 || (length > 0 && data != nullptr));
+  }
+
+  // Returns the length of the vector.
+  int length() const { return length_; }
+
+  // Returns the pointer to the start of the data in the vector.
+  T* start() const { return start_; }
+
+  // Access individual vector elements - checks bounds in debug mode.
+  T& operator[](int index) const {
+    MOZ_ASSERT(0 <= index && index < length_);
+    return start_[index];
+  }
+
+  V8Vector<T> operator+(int offset) const {
+    MOZ_ASSERT(offset < length_);
+    return V8Vector<T>(start_ + offset, length_ - offset);
+  }
+
+ private:
+  T* start_;
+  int length_;
+};
+
+template <typename T, int kSize>
+class EmbeddedVector : public V8Vector<T> {
+ public:
+  EmbeddedVector() : V8Vector<T>(buffer_, kSize) {}
+
+  explicit EmbeddedVector(T initial_value) : V8Vector<T>(buffer_, kSize) {
+    for (int i = 0; i < kSize; ++i) {
+      buffer_[i] = initial_value;
+    }
+  }
+
+  // When copying, make underlying Vector to reference our buffer.
+  EmbeddedVector(const EmbeddedVector& rhs) : V8Vector<T>(rhs) {
+    MemCopy(buffer_, rhs.buffer_, sizeof(T) * kSize);
+    this->set_start(buffer_);
+  }
+
+  EmbeddedVector& operator=(const EmbeddedVector& rhs) {
+    if (this == &rhs) return *this;
+    V8Vector<T>::operator=(rhs);
+    MemCopy(buffer_, rhs.buffer_, sizeof(T) * kSize);
+    this->set_start(buffer_);
+    return *this;
+  }
+
+ private:
+  T buffer_[kSize];
+};
+
+// Interface and default implementation for converting addresses and
+// register-numbers to text.  The default implementation is machine
+// specific.
+class NameConverter {
+ public:
+  virtual ~NameConverter() {}
+  virtual const char* NameOfCPURegister(int reg) const;
+  virtual const char* NameOfByteCPURegister(int reg) const;
+  virtual const char* NameOfXMMRegister(int reg) const;
+  virtual const char* NameOfAddress(byte* addr) const;
+  virtual const char* NameOfConstant(byte* addr) const;
+  virtual const char* NameInCode(byte* addr) const;
+
+ protected:
+  EmbeddedVector<char, 128> tmp_buffer_;
+};
+
+// A generic Disassembler interface
+class Disassembler {
+ public:
+  // Caller deallocates converter.
+  explicit Disassembler(const NameConverter& converter);
+
+  virtual ~Disassembler();
+
+  // Writes one disassembled instruction into 'buffer' (0-terminated).
+  // Returns the length of the disassembled machine instruction in bytes.
+  int InstructionDecode(V8Vector<char> buffer, uint8_t* instruction);
+
+  // Returns -1 if instruction does not mark the beginning of a constant pool,
+  // or the number of entries in the constant pool beginning here.
+  int ConstantPoolSizeAt(byte* instruction);
+
+  // Write disassembly into specified file 'f' using specified NameConverter
+  // (see constructor).
+  static void Disassemble(FILE* f, uint8_t* begin, uint8_t* end);
+
+ private:
+  const NameConverter& converter_;
+
+  // Disallow implicit constructors.
+  Disassembler() = delete;
+  Disassembler(const Disassembler&) = delete;
+  void operator=(const Disassembler&) = delete;
+};
+
+}  // namespace disasm
+}  // namespace jit
+}  // namespace js
+
+#endif  // JS_DISASM_ARM
+
+#endif  // jit_arm_disasm_Disasm_arm_h
diff --git a/js/src/jit/arm/gen-double-encoder-table.py b/js/src/jit/arm/gen-double-encoder-table.py
new file mode 100644
index 0000000000..fd622da82e
--- /dev/null
+++ b/js/src/jit/arm/gen-double-encoder-table.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""Generate tables of immediately-encodable VFP doubles.
+
+DOES NOT get automatically run during the build process.  If you need to
+modify this file (which is unlikely), you must re-run this script:
+
+python gen-double-encode-table.py > $(topsrcdir)/path/to/DoubleEntryTable.tbl
+"""
+
+import operator
+
+
+def rep(bit, count):
+    return reduce(operator.ior, [bit << c for c in range(count)])
+
+
+def encodeDouble(value):
+    """Generate an ARM ARM 'VFP modified immediate constant' with format:
+    aBbbbbbb bbcdefgh 000...
+
+    We will return the top 32 bits of the double; the rest are 0."""
+    assert (0 <= value) and (value <= 255)
+    a = value >> 7
+    b = (value >> 6) & 1
+    B = int(b == 0)
+    cdefgh = value & 0x3F
+    return (a << 31) | (B << 30) | (rep(b, 8) << 22) | cdefgh << 16
+
+
+print("/* THIS FILE IS AUTOMATICALLY GENERATED BY gen-double-encode-table.py.  */")
+for i in range(256):
+    print("  { 0x%08x, { %d, %d, 0 } }," % (encodeDouble(i), i & 0xF, i >> 4))
diff --git a/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S
new file mode 100644
index 0000000000..0237f2221d
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S
@@ -0,0 +1,27 @@
+//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+//   int rem, quot;
+//   quot = __divmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__divmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
diff --git a/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S
new file mode 100644
index 0000000000..f7e1d2ebed
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S
@@ -0,0 +1,28 @@
+//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+//   unsigned rem, quot;
+//   quot = __udivmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__udivmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
diff --git a/js/src/jit/arm/llvm-compiler-rt/assembly.h b/js/src/jit/arm/llvm-compiler-rt/assembly.h
new file mode 100644
index 0000000000..802d1e2870
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/assembly.h
@@ -0,0 +1,67 @@
+/* ===-- assembly.h - compiler-rt assembler support macros -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file defines macros for use in compiler-rt assembler source.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef COMPILERRT_ASSEMBLY_H
+#define COMPILERRT_ASSEMBLY_H
+
+#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__)
+#  define SEPARATOR @
+#else
+#  define SEPARATOR ;
+#endif
+
+#if defined(__APPLE__)
+#  define HIDDEN_DIRECTIVE .private_extern
+#  define LOCAL_LABEL(name) L_##name
+#else
+#  define HIDDEN_DIRECTIVE .hidden
+#  define LOCAL_LABEL(name) .L_##name
+#endif
+
+#define GLUE2(a, b) a##b
+#define GLUE(a, b) GLUE2(a, b)
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#ifdef VISIBILITY_HIDDEN
+#  define DECLARE_SYMBOL_VISIBILITY(name) \
+    HIDDEN_DIRECTIVE SYMBOL_NAME(name) SEPARATOR
+#else
+#  define DECLARE_SYMBOL_VISIBILITY(name)
+#endif
+
+#define DEFINE_COMPILERRT_FUNCTION(name) \
+  .globl SYMBOL_NAME(name)               \
+  SEPARATOR DECLARE_SYMBOL_VISIBILITY(name) SYMBOL_NAME(name) :
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \
+  .globl SYMBOL_NAME(name)                       \
+  SEPARATOR HIDDEN_DIRECTIVE SYMBOL_NAME(name)   \
+  SEPARATOR SYMBOL_NAME(name) :
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \
+  .globl name SEPARATOR HIDDEN_DIRECTIVE name SEPARATOR name:
+
+#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)      \
+  .globl SYMBOL_NAME(name) SEPARATOR.set SYMBOL_NAME(name), \
+      SYMBOL_NAME(target) SEPARATOR
+
+#if defined(__ARM_EABI__)
+#  define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \
+    DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name)
+#else
+#  define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)
+#endif
+
+#endif /* COMPILERRT_ASSEMBLY_H */
diff --git a/js/src/jit/arm64/Architecture-arm64.cpp b/js/src/jit/arm64/Architecture-arm64.cpp
new file mode 100644
index 0000000000..eb3dd67b1a
--- /dev/null
+++ b/js/src/jit/arm64/Architecture-arm64.cpp
@@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/Architecture-arm64.h"
+
+#include <cstring>
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+#include "jit/FlushICache.h"  // js::jit::FlushICache
+#include "jit/RegisterSets.h"
+
+namespace js {
+namespace jit {
+
+Registers::Code Registers::FromName(const char* name) {
+  // Check for some register aliases first.
+  if (strcmp(name, "ip0") == 0) {
+    return ip0;
+  }
+  if (strcmp(name, "ip1") == 0) {
+    return ip1;
+  }
+  if (strcmp(name, "fp") == 0) {
+    return fp;
+  }
+
+  for (uint32_t i = 0; i < Total; i++) {
+    if (strcmp(GetName(i), name) == 0) {
+      return Code(i);
+    }
+  }
+
+  return Invalid;
+}
+
+FloatRegisters::Code FloatRegisters::FromName(const char* name) {
+  for (size_t i = 0; i < Total; i++) {
+    if (strcmp(GetName(i), name) == 0) {
+      return Code(i);
+    }
+  }
+
+  return Invalid;
+}
+
+// This must sync with GetPushSizeInBytes just below and also with
+// MacroAssembler::PushRegsInMask.
+FloatRegisterSet FloatRegister::ReduceSetForPush(const FloatRegisterSet& s) {
+  SetType all = s.bits();
+  SetType set128b =
+      (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128;
+  SetType doubleSet =
+      (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble;
+  SetType singleSet =
+      (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle;
+
+  // See GetPushSizeInBytes.
+  SetType set64b = (singleSet | doubleSet) & ~set128b;
+
+  SetType reduced = (set128b << FloatRegisters::ShiftSimd128) |
+                    (set64b << FloatRegisters::ShiftDouble);
+  return FloatRegisterSet(reduced);
+}
+
+// Compute the size of the dump area for |s.ReduceSetForPush()|, as defined by
+// MacroAssembler::PushRegsInMask for this target.
+uint32_t FloatRegister::GetPushSizeInBytes(const FloatRegisterSet& s) {
+  SetType all = s.bits();
+  SetType set128b =
+      (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128;
+  SetType doubleSet =
+      (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble;
+  SetType singleSet =
+      (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle;
+
+  // PushRegsInMask pushes singles as if they were doubles.  Also we need to
+  // remove singles or doubles which are also pushed as part of a vector
+  // register.
+  SetType set64b = (singleSet | doubleSet) & ~set128b;
+
+  // The "+ 1) & ~1" is to take into account the alignment hole below the
+  // double-reg dump area.  See MacroAssembler::PushRegsInMaskSizeInBytes.
+  return ((set64b.size() + 1) & ~1) * sizeof(double) +
+         set128b.size() * SizeOfSimd128;
+}
+
+uint32_t FloatRegister::getRegisterDumpOffsetInBytes() {
+  // See block comment in MacroAssembler.h for further required invariants.
+  static_assert(sizeof(jit::FloatRegisters::RegisterContent) == 16);
+  return encoding() * sizeof(jit::FloatRegisters::RegisterContent);
+}
+
+// For N in 0..31, if any of sN, dN or qN is a member of `s`, the returned set
+// will contain all of sN, dN and qN.
+FloatRegisterSet FloatRegister::BroadcastToAllSizes(const FloatRegisterSet& s) {
+  SetType all = s.bits();
+  SetType set128b =
+      (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128;
+  SetType doubleSet =
+      (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble;
+  SetType singleSet =
+      (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle;
+
+  SetType merged = set128b | doubleSet | singleSet;
+  SetType broadcasted = (merged << FloatRegisters::ShiftSimd128) |
+                        (merged << FloatRegisters::ShiftDouble) |
+                        (merged << FloatRegisters::ShiftSingle);
+
+  return FloatRegisterSet(broadcasted);
+}
+
+uint32_t GetARM64Flags() { return 0; }
+
+// CPU flags handling on ARM64 is currently different from other platforms:
+// the flags are computed and stored per-assembler and are thus "always
+// computed".
+bool CPUFlagsHaveBeenComputed() { return true; }
+
+void FlushICache(void* code, size_t size) {
+  vixl::CPU::EnsureIAndDCacheCoherency(code, size);
+}
+
+void FlushExecutionContext() { vixl::CPU::FlushExecutionContext(); }
+
+}  // namespace jit
+}  // namespace js
diff --git a/js/src/jit/arm64/Architecture-arm64.h b/js/src/jit/arm64/Architecture-arm64.h
new file mode 100644
index 0000000000..96bbc63848
--- /dev/null
+++ b/js/src/jit/arm64/Architecture-arm64.h
@@ -0,0 +1,773 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_Architecture_arm64_h
+#define jit_arm64_Architecture_arm64_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "jit/shared/Architecture-shared.h"
+
+#include "js/Utility.h"
+
+#define JS_HAS_HIDDEN_SP
+static const uint32_t HiddenSPEncoding = vixl::kSPRegInternalCode;
+
+namespace js {
+namespace jit {
+
+// AArch64 has 32 64-bit integer registers, x0 though x31.
+//
+//  x31 (or, more accurately, the integer register with encoding 31, since
+//  there is no x31 per se) is special and functions as both the stack pointer
+//  and a zero register.
+//
+//  The bottom 32 bits of each of the X registers is accessible as w0 through
+//  w31. The program counter is not accessible as a register.
+//
+// SIMD and scalar floating-point registers share a register bank.
+//  32 bit float registers are s0 through s31.
+//  64 bit double registers are d0 through d31.
+//  128 bit SIMD registers are v0 through v31.
+//  e.g., s0 is the bottom 32 bits of d0, which is the bottom 64 bits of v0.
+
+// AArch64 Calling Convention:
+//  x0 - x7: arguments and return value
+//  x8: indirect result (struct) location
+//  x9 - x15: temporary registers
+//  x16 - x17: intra-call-use registers (PLT, linker)
+//  x18: platform specific use (TLS)
+//  x19 - x28: callee-saved registers
+//  x29: frame pointer
+//  x30: link register
+
+// AArch64 Calling Convention for Floats:
+//  d0 - d7: arguments and return value
+//  d8 - d15: callee-saved registers
+//   Bits 64:128 are not saved for v8-v15.
+//  d16 - d31: temporary registers
+
+// AArch64 does not have soft float.
+
+class Registers {
+ public:
+  enum RegisterID {
+    w0 = 0,
+    x0 = 0,
+    w1 = 1,
+    x1 = 1,
+    w2 = 2,
+    x2 = 2,
+    w3 = 3,
+    x3 = 3,
+    w4 = 4,
+    x4 = 4,
+    w5 = 5,
+    x5 = 5,
+    w6 = 6,
+    x6 = 6,
+    w7 = 7,
+    x7 = 7,
+    w8 = 8,
+    x8 = 8,
+    w9 = 9,
+    x9 = 9,
+    w10 = 10,
+    x10 = 10,
+    w11 = 11,
+    x11 = 11,
+    w12 = 12,
+    x12 = 12,
+    w13 = 13,
+    x13 = 13,
+    w14 = 14,
+    x14 = 14,
+    w15 = 15,
+    x15 = 15,
+    w16 = 16,
+    x16 = 16,
+    ip0 = 16,  // MacroAssembler scratch register 1.
+    w17 = 17,
+    x17 = 17,
+    ip1 = 17,  // MacroAssembler scratch register 2.
+    w18 = 18,
+    x18 = 18,
+    tls = 18,  // Platform-specific use (TLS).
+    w19 = 19,
+    x19 = 19,
+    w20 = 20,
+    x20 = 20,
+    w21 = 21,
+    x21 = 21,
+    w22 = 22,
+    x22 = 22,
+    w23 = 23,
+    x23 = 23,
+    w24 = 24,
+    x24 = 24,
+    w25 = 25,
+    x25 = 25,
+    w26 = 26,
+    x26 = 26,
+    w27 = 27,
+    x27 = 27,
+    w28 = 28,
+    x28 = 28,
+    w29 = 29,
+    x29 = 29,
+    fp = 29,
+    w30 = 30,
+    x30 = 30,
+    lr = 30,
+    w31 = 31,
+    x31 = 31,
+    wzr = 31,
+    xzr = 31,
+    sp = 31,  // Special: both stack pointer and a zero register.
+  };
+  typedef uint8_t Code;
+  typedef uint32_t Encoding;
+  typedef uint32_t SetType;
+
+  static const Code Invalid = 0xFF;
+
+  union RegisterContent {
+    uintptr_t r;
+  };
+
+  static uint32_t SetSize(SetType x) {
+    static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+    return mozilla::CountPopulation32(x);
+  }
+  static uint32_t FirstBit(SetType x) {
+    return mozilla::CountTrailingZeroes32(x);
+  }
+  static uint32_t LastBit(SetType x) {
+    return 31 - mozilla::CountLeadingZeroes32(x);
+  }
+
+  static const char* GetName(uint32_t code) {
+    static const char* const Names[] = {
+        "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+        "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+        "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+        "x24", "x25", "x26", "x27", "x28", "x29", "lr",  "sp"};
+    static_assert(Total == std::size(Names), "Table is the correct size");
+    if (code >= Total) {
+      return "invalid";
+    }
+    return Names[code];
+  }
+
+  static Code FromName(const char* name);
+
+  static const uint32_t Total = 32;
+  static const uint32_t TotalPhys = 32;
+  static const uint32_t Allocatable =
+      27;  // No named special-function registers.
+
+  static const SetType AllMask = 0xFFFFFFFF;
+  static const SetType NoneMask = 0x0;
+
+  static const SetType ArgRegMask =
+      (1 << Registers::x0) | (1 << Registers::x1) | (1 << Registers::x2) |
+      (1 << Registers::x3) | (1 << Registers::x4) | (1 << Registers::x5) |
+      (1 << Registers::x6) | (1 << Registers::x7) | (1 << Registers::x8);
+
+  static const SetType VolatileMask =
+      (1 << Registers::x0) | (1 << Registers::x1) | (1 << Registers::x2) |
+      (1 << Registers::x3) | (1 << Registers::x4) | (1 << Registers::x5) |
+      (1 << Registers::x6) | (1 << Registers::x7) | (1 << Registers::x8) |
+      (1 << Registers::x9) | (1 << Registers::x10) | (1 << Registers::x11) |
+      (1 << Registers::x12) | (1 << Registers::x13) | (1 << Registers::x14) |
+      (1 << Registers::x15) | (1 << Registers::x16) | (1 << Registers::x17) |
+      (1 << Registers::x18);
+
+  static const SetType NonVolatileMask =
+      (1 << Registers::x19) | (1 << Registers::x20) | (1 << Registers::x21) |
+      (1 << Registers::x22) | (1 << Registers::x23) | (1 << Registers::x24) |
+      (1 << Registers::x25) | (1 << Registers::x26) | (1 << Registers::x27) |
+      (1 << Registers::x28) | (1 << Registers::x29) | (1 << Registers::x30);
+
+  static const SetType NonAllocatableMask =
+      (1 << Registers::x28) |  // PseudoStackPointer.
+      (1 << Registers::ip0) |  // First scratch register.
+      (1 << Registers::ip1) |  // Second scratch register.
+      (1 << Registers::tls) | (1 << Registers::lr) | (1 << Registers::sp) |
+      (1 << Registers::fp);
+
+  static const SetType WrapperMask = VolatileMask;
+
+  // Registers returned from a JS -> JS call.
+  static const SetType JSCallMask = (1 << Registers::x2);
+
+  // Registers returned from a JS -> C call.
+  static const SetType CallMask = (1 << Registers::x0);
+
+  static const SetType AllocatableMask = AllMask & ~NonAllocatableMask;
+};
+
+// Smallest integer type that can hold a register bitmask.
+typedef uint32_t PackedRegisterMask;
+
+template <typename T>
+class TypedRegisterSet;
+
+// 128-bit bitset for FloatRegisters::SetType.
+
+class Bitset128 {
+  // The order (hi, lo) looks best in the debugger.
+  uint64_t hi, lo;
+
+ public:
+  MOZ_IMPLICIT constexpr Bitset128(uint64_t initial) : hi(0), lo(initial) {}
+  MOZ_IMPLICIT constexpr Bitset128(const Bitset128& that)
+      : hi(that.hi), lo(that.lo) {}
+
+  constexpr Bitset128(uint64_t hi, uint64_t lo) : hi(hi), lo(lo) {}
+
+  constexpr uint64_t high() const { return hi; }
+
+  constexpr uint64_t low() const { return lo; }
+
+  constexpr Bitset128 operator|(Bitset128 that) const {
+    return Bitset128(hi | that.hi, lo | that.lo);
+  }
+
+  constexpr Bitset128 operator&(Bitset128 that) const {
+    return Bitset128(hi & that.hi, lo & that.lo);
+  }
+
+  constexpr Bitset128 operator^(Bitset128 that) const {
+    return Bitset128(hi ^ that.hi, lo ^ that.lo);
+  }
+
+  constexpr Bitset128 operator~() const { return Bitset128(~hi, ~lo); }
+
+  // We must avoid shifting by the word width, which is complex.  Inlining plus
+  // shift-by-constant will remove a lot of code in the normal case.
+
+  constexpr Bitset128 operator<<(size_t shift) const {
+    if (shift == 0) {
+      return *this;
+    }
+    if (shift < 64) {
+      return Bitset128((hi << shift) | (lo >> (64 - shift)), lo << shift);
+    }
+    if (shift == 64) {
+      return Bitset128(lo, 0);
+    }
+    return Bitset128(lo << (shift - 64), 0);
+  }
+
+  constexpr Bitset128 operator>>(size_t shift) const {
+    if (shift == 0) {
+      return *this;
+    }
+    if (shift < 64) {
+      return Bitset128(hi >> shift, (lo >> shift) | (hi << (64 - shift)));
+    }
+    if (shift == 64) {
+      return Bitset128(0, hi);
+    }
+    return Bitset128(0, hi >> (shift - 64));
+  }
+
+  constexpr bool operator==(Bitset128 that) const {
+    return lo == that.lo && hi == that.hi;
+  }
+
+  constexpr bool operator!=(Bitset128 that) const {
+    return lo != that.lo || hi != that.hi;
+  }
+
+  constexpr bool operator!() const { return (hi | lo) == 0; }
+
+  Bitset128& operator|=(const Bitset128& that) {
+    hi |= that.hi;
+    lo |= that.lo;
+    return *this;
+  }
+
+  Bitset128& operator&=(const Bitset128& that) {
+    hi &= that.hi;
+    lo &= that.lo;
+    return *this;
+  }
+
+  uint32_t size() const {
+    return mozilla::CountPopulation64(hi) + mozilla::CountPopulation64(lo);
+  }
+
+  uint32_t countTrailingZeroes() const {
+    if (lo) {
+      return mozilla::CountTrailingZeroes64(lo);
+    }
+    return mozilla::CountTrailingZeroes64(hi) + 64;
+  }
+
+  uint32_t countLeadingZeroes() const {
+    if (hi) {
+      return mozilla::CountLeadingZeroes64(hi);
+    }
+    return mozilla::CountLeadingZeroes64(lo) + 64;
+  }
+};
+
+class FloatRegisters {
+ public:
+  enum FPRegisterID {
+    s0 = 0,
+    d0 = 0,
+    v0 = 0,
+    s1 = 1,
+    d1 = 1,
+    v1 = 1,
+    s2 = 2,
+    d2 = 2,
+    v2 = 2,
+    s3 = 3,
+    d3 = 3,
+    v3 = 3,
+    s4 = 4,
+    d4 = 4,
+    v4 = 4,
+    s5 = 5,
+    d5 = 5,
+    v5 = 5,
+    s6 = 6,
+    d6 = 6,
+    v6 = 6,
+    s7 = 7,
+    d7 = 7,
+    v7 = 7,
+    s8 = 8,
+    d8 = 8,
+    v8 = 8,
+    s9 = 9,
+    d9 = 9,
+    v9 = 9,
+    s10 = 10,
+    d10 = 10,
+    v10 = 10,
+    s11 = 11,
+    d11 = 11,
+    v11 = 11,
+    s12 = 12,
+    d12 = 12,
+    v12 = 12,
+    s13 = 13,
+    d13 = 13,
+    v13 = 13,
+    s14 = 14,
+    d14 = 14,
+    v14 = 14,
+    s15 = 15,
+    d15 = 15,
+    v15 = 15,
+    s16 = 16,
+    d16 = 16,
+    v16 = 16,
+    s17 = 17,
+    d17 = 17,
+    v17 = 17,
+    s18 = 18,
+    d18 = 18,
+    v18 = 18,
+    s19 = 19,
+    d19 = 19,
+    v19 = 19,
+    s20 = 20,
+    d20 = 20,
+    v20 = 20,
+    s21 = 21,
+    d21 = 21,
+    v21 = 21,
+    s22 = 22,
+    d22 = 22,
+    v22 = 22,
+    s23 = 23,
+    d23 = 23,
+    v23 = 23,
+    s24 = 24,
+    d24 = 24,
+    v24 = 24,
+    s25 = 25,
+    d25 = 25,
+    v25 = 25,
+    s26 = 26,
+    d26 = 26,
+    v26 = 26,
+    s27 = 27,
+    d27 = 27,
+    v27 = 27,
+    s28 = 28,
+    d28 = 28,
+    v28 = 28,
+    s29 = 29,
+    d29 = 29,
+    v29 = 29,
+    s30 = 30,
+    d30 = 30,
+    v30 = 30,
+    s31 = 31,
+    d31 = 31,
+    v31 = 31,  // Scratch register.
+  };
+
+  // Eight bits: (invalid << 7) | (kind << 5) | encoding
+  typedef uint8_t Code;
+  typedef FPRegisterID Encoding;
+  typedef Bitset128 SetType;
+
+  enum Kind : uint8_t { Single, Double, Simd128, NumTypes };
+
+  static constexpr Code Invalid = 0x80;
+
+  static const char* GetName(uint32_t code) {
+    // clang-format off
+    static const char* const Names[] = {
+        "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8",  "s9",
+        "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19",
+        "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29",
+        "s30", "s31",
+
+        "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",  "d8",  "d9",
+        "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19",
+        "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29",
+        "d30", "d31",
+
+        "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",  "v8",  "v9",
+        "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+        "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+        "v30", "v31",
+    };
+    // clang-format on
+    static_assert(Total == std::size(Names), "Table is the correct size");
+    if (code >= Total) {
+      return "invalid";
+    }
+    return Names[code];
+  }
+
+  static Code FromName(const char* name);
+
+  static const uint32_t TotalPhys = 32;
+  static const uint32_t Total = TotalPhys * NumTypes;
+  static const uint32_t Allocatable = 31;  // Without d31, the scratch register.
+
+  static_assert(sizeof(SetType) * 8 >= Total,
+                "SetType should be large enough to enumerate all registers.");
+
+  static constexpr unsigned ShiftSingle = uint32_t(Single) * TotalPhys;
+  static constexpr unsigned ShiftDouble = uint32_t(Double) * TotalPhys;
+  static constexpr unsigned ShiftSimd128 = uint32_t(Simd128) * TotalPhys;
+
+  static constexpr SetType NoneMask = SetType(0);
+  static constexpr SetType AllPhysMask = ~(~SetType(0) << TotalPhys);
+  static constexpr SetType AllSingleMask = AllPhysMask << ShiftSingle;
+  static constexpr SetType AllDoubleMask = AllPhysMask << ShiftDouble;
+  static constexpr SetType AllSimd128Mask = AllPhysMask << ShiftSimd128;
+  static constexpr SetType AllMask =
+      AllDoubleMask | AllSingleMask | AllSimd128Mask;
+  static constexpr SetType AliasMask = (SetType(1) << ShiftSingle) |
+                                       (SetType(1) << ShiftDouble) |
+                                       (SetType(1) << ShiftSimd128);
+
+  static_assert(ShiftSingle == 0,
+                "Or the NonVolatileMask must be computed differently");
+
+  // s31 is the ScratchFloatReg.
+  static constexpr SetType NonVolatileSingleMask =
+      SetType((1 << FloatRegisters::s8) | (1 << FloatRegisters::s9) |
+              (1 << FloatRegisters::s10) | (1 << FloatRegisters::s11) |
+              (1 << FloatRegisters::s12) | (1 << FloatRegisters::s13) |
+              (1 << FloatRegisters::s14) | (1 << FloatRegisters::s15) |
+              (1 << FloatRegisters::s16) | (1 << FloatRegisters::s17) |
+              (1 << FloatRegisters::s18) | (1 << FloatRegisters::s19) |
+              (1 << FloatRegisters::s20) | (1 << FloatRegisters::s21) |
+              (1 << FloatRegisters::s22) | (1 << FloatRegisters::s23) |
+              (1 << FloatRegisters::s24) | (1 << FloatRegisters::s25) |
+              (1 << FloatRegisters::s26) | (1 << FloatRegisters::s27) |
+              (1 << FloatRegisters::s28) | (1 << FloatRegisters::s29) |
+              (1 << FloatRegisters::s30));
+
+  static constexpr SetType NonVolatileMask =
+      (NonVolatileSingleMask << ShiftSingle) |
+      (NonVolatileSingleMask << ShiftDouble) |
+      (NonVolatileSingleMask << ShiftSimd128);
+
+  static constexpr SetType VolatileMask = AllMask & ~NonVolatileMask;
+
+  static constexpr SetType WrapperMask = VolatileMask;
+
+  static_assert(ShiftSingle == 0,
+                "Or the NonAllocatableMask must be computed differently");
+
+  // d31 is the ScratchFloatReg.
+  static constexpr SetType NonAllocatableSingleMask =
+      (SetType(1) << FloatRegisters::s31);
+
+  static constexpr SetType NonAllocatableMask =
+      NonAllocatableSingleMask | (NonAllocatableSingleMask << ShiftDouble) |
+      (NonAllocatableSingleMask << ShiftSimd128);
+
+  static constexpr SetType AllocatableMask = AllMask & ~NonAllocatableMask;
+
+  // Content spilled during bailouts.
+  union RegisterContent {
+    float s;
+    double d;
+    uint8_t v128[16];
+  };
+
+  static constexpr Encoding encoding(Code c) {
+    // assert() not available in constexpr function.
+    // assert(c < Total);
+    return Encoding(c & 31);
+  }
+
+  static constexpr Kind kind(Code c) {
+    // assert() not available in constexpr function.
+    // assert(c < Total && ((c >> 5) & 3) < NumTypes);
+    return Kind((c >> 5) & 3);
+  }
+
+  static constexpr Code fromParts(uint32_t encoding, uint32_t kind,
+                                  uint32_t invalid) {
+    return Code((invalid << 7) | (kind << 5) | encoding);
+  }
+};
+
+static const uint32_t SpillSlotSize =
+    std::max(sizeof(Registers::RegisterContent),
+             sizeof(FloatRegisters::RegisterContent));
+
+static const uint32_t ShadowStackSpace = 0;
+
+// When our only strategy for far jumps is to encode the offset directly, and
+// not insert any jump islands during assembly for even further jumps, then the
+// architecture restricts us to -2^27 .. 2^27-4, to fit into a signed 28-bit
+// value.  We further reduce this range to allow the far-jump inserting code to
+// have some breathing room.
+static const uint32_t JumpImmediateRange = ((1 << 27) - (20 * 1024 * 1024));
+
+static const uint32_t ABIStackAlignment = 16;
+static const uint32_t CodeAlignment = 16;
+static const bool StackKeptAligned = false;
+
+// Although sp is only usable if 16-byte alignment is kept,
+// the Pseudo-StackPointer enables use of 8-byte alignment.
+static const uint32_t StackAlignment = 8;
+static const uint32_t NativeFrameSize = 8;
+
+struct FloatRegister {
+  typedef FloatRegisters Codes;
+  typedef Codes::Code Code;
+  typedef Codes::Encoding Encoding;
+  typedef Codes::SetType SetType;
+
+  static uint32_t SetSize(SetType x) {
+    static_assert(sizeof(SetType) == 16, "SetType must be 128 bits");
+    x |= x >> FloatRegisters::TotalPhys;
+    x |= x >> FloatRegisters::TotalPhys;
+    x &= FloatRegisters::AllPhysMask;
+    MOZ_ASSERT(x.high() == 0);
+    MOZ_ASSERT((x.low() >> 32) == 0);
+    return mozilla::CountPopulation32(x.low());
+  }
+
+  static uint32_t FirstBit(SetType x) {
+    static_assert(sizeof(SetType) == 16, "SetType");
+    return x.countTrailingZeroes();
+  }
+  static uint32_t LastBit(SetType x) {
+    static_assert(sizeof(SetType) == 16, "SetType");
+    return 127 - x.countLeadingZeroes();
+  }
+
+  static constexpr size_t SizeOfSimd128 = 16;
+
+ private:
+  // These fields only hold valid values: an invalid register is always
+  // represented as a valid encoding and kind with the invalid_ bit set.
+  uint8_t encoding_;  // 32 encodings
+  uint8_t kind_;      // Double, Single, Simd128
+  bool invalid_;
+
+  typedef Codes::Kind Kind;
+
+ public:
+  constexpr FloatRegister(Encoding encoding, Kind kind)
+      : encoding_(encoding), kind_(kind), invalid_(false) {
+    // assert(uint32_t(encoding) < Codes::TotalPhys);
+  }
+
+  constexpr FloatRegister()
+      : encoding_(0), kind_(FloatRegisters::Double), invalid_(true) {}
+
+  static FloatRegister FromCode(uint32_t i) {
+    MOZ_ASSERT(i < Codes::Total);
+    return FloatRegister(FloatRegisters::encoding(i), FloatRegisters::kind(i));
+  }
+
+  bool isSingle() const {
+    MOZ_ASSERT(!invalid_);
+    return kind_ == FloatRegisters::Single;
+  }
+  bool isDouble() const {
+    MOZ_ASSERT(!invalid_);
+    return kind_ == FloatRegisters::Double;
+  }
+  bool isSimd128() const {
+    MOZ_ASSERT(!invalid_);
+    return kind_ == FloatRegisters::Simd128;
+  }
+  bool isInvalid() const { return invalid_; }
+
+  FloatRegister asSingle() const {
+    MOZ_ASSERT(!invalid_);
+    return FloatRegister(Encoding(encoding_), FloatRegisters::Single);
+  }
+  FloatRegister asDouble() const {
+    MOZ_ASSERT(!invalid_);
+    return FloatRegister(Encoding(encoding_), FloatRegisters::Double);
+  }
+  FloatRegister asSimd128() const {
+    MOZ_ASSERT(!invalid_);
+    return FloatRegister(Encoding(encoding_), FloatRegisters::Simd128);
+  }
+
+  constexpr uint32_t size() const {
+    MOZ_ASSERT(!invalid_);
+    if (kind_ == FloatRegisters::Double) {
+      return sizeof(double);
+    }
+    if (kind_ == FloatRegisters::Single) {
+      return sizeof(float);
+    }
+    MOZ_ASSERT(kind_ == FloatRegisters::Simd128);
+    return SizeOfSimd128;
+  }
+
+  constexpr Code code() const {
+    // assert(!invalid_);
+    return Codes::fromParts(encoding_, kind_, invalid_);
+  }
+
+  constexpr Encoding encoding() const {
+    MOZ_ASSERT(!invalid_);
+    return Encoding(encoding_);
+  }
+
+  const char* name() const { return FloatRegisters::GetName(code()); }
+  bool volatile_() const {
+    MOZ_ASSERT(!invalid_);
+    return !!((SetType(1) << code()) & FloatRegisters::VolatileMask);
+  }
+  constexpr bool operator!=(FloatRegister other) const {
+    return code() != other.code();
+  }
+  constexpr bool operator==(FloatRegister other) const {
+    return code() == other.code();
+  }
+
+  bool aliases(FloatRegister other) const {
+    return other.encoding_ == encoding_;
+  }
+  // This function mostly exists for the ARM backend.  It is to ensure that two
+  // floating point registers' types are equivalent.  e.g. S0 is not equivalent
+  // to D16, since S0 holds a float32, and D16 holds a Double.
+  // Since all floating point registers on x86 and x64 are equivalent, it is
+  // reasonable for this function to do the same.
+  bool equiv(FloatRegister other) const {
+    MOZ_ASSERT(!invalid_);
+    return kind_ == other.kind_;
+  }
+
+  uint32_t numAliased() const { return Codes::NumTypes; }
+  uint32_t numAlignedAliased() { return numAliased(); }
+
+  FloatRegister aliased(uint32_t aliasIdx) {
+    MOZ_ASSERT(!invalid_);
+    MOZ_ASSERT(aliasIdx < numAliased());
+    return FloatRegister(Encoding(encoding_),
+                         Kind((aliasIdx + kind_) % Codes::NumTypes));
+  }
+  FloatRegister alignedAliased(uint32_t aliasIdx) { return aliased(aliasIdx); }
+  SetType alignedOrDominatedAliasedSet() const {
+    return Codes::AliasMask << encoding_;
+  }
+
+  static constexpr RegTypeName DefaultType = RegTypeName::Float64;
+
+  template <RegTypeName Name = DefaultType>
+  static SetType LiveAsIndexableSet(SetType s) {
+    return SetType(0);
+  }
+
+  template <RegTypeName Name = DefaultType>
+  static SetType AllocatableAsIndexableSet(SetType s) {
+    static_assert(Name != RegTypeName::Any, "Allocatable set are not iterable");
+    return LiveAsIndexableSet<Name>(s);
+  }
+
+  static TypedRegisterSet<FloatRegister> ReduceSetForPush(
+      const TypedRegisterSet<FloatRegister>& s);
+  static uint32_t GetPushSizeInBytes(const TypedRegisterSet<FloatRegister>& s);
+  uint32_t getRegisterDumpOffsetInBytes();
+
+  // For N in 0..31, if any of sN, dN or qN is a member of `s`, the
+  // returned set will contain all of sN, dN and qN.
+  static TypedRegisterSet<FloatRegister> BroadcastToAllSizes(
+      const TypedRegisterSet<FloatRegister>& s);
+};
+
+template <>
+inline FloatRegister::SetType
+FloatRegister::LiveAsIndexableSet<RegTypeName::Float32>(SetType set) {
+  return set & FloatRegisters::AllSingleMask;
+}
+
+template <>
+inline FloatRegister::SetType
+FloatRegister::LiveAsIndexableSet<RegTypeName::Float64>(SetType set) {
+  return set & FloatRegisters::AllDoubleMask;
+}
+
+template <>
+inline FloatRegister::SetType
+FloatRegister::LiveAsIndexableSet<RegTypeName::Vector128>(SetType set) {
+  return set & FloatRegisters::AllSimd128Mask;
+}
+
+template <>
+inline FloatRegister::SetType
+FloatRegister::LiveAsIndexableSet<RegTypeName::Any>(SetType set) {
+  return set;
+}
+
+// ARM/D32 has double registers that cannot be treated as float32.
+// Luckily, ARMv8 doesn't have the same misfortune.
+inline bool hasUnaliasedDouble() { return false; }
+
+// ARM prior to ARMv8 also has doubles that alias multiple floats.
+// Again, ARMv8 is in the clear.
+inline bool hasMultiAlias() { return false; }
+
+uint32_t GetARM64Flags();
+
+bool CanFlushICacheFromBackgroundThreads();
+
+}  // namespace jit
+}  // namespace js
+
+#endif  // jit_arm64_Architecture_arm64_h
diff --git a/js/src/jit/arm64/Assembler-arm64.cpp b/js/src/jit/arm64/Assembler-arm64.cpp
new file mode 100644
index 0000000000..1e441ae635
--- /dev/null
+++ b/js/src/jit/arm64/Assembler-arm64.cpp
@@ -0,0 +1,609 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/Assembler-arm64.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include "gc/Marking.h"
+#include "jit/arm64/Architecture-arm64.h"
+#include "jit/arm64/MacroAssembler-arm64.h"
+#include "jit/arm64/vixl/Disasm-vixl.h"
+#include "jit/AutoWritableJitCode.h"
+#include "jit/ExecutableAllocator.h"
+#include "vm/Realm.h"
+
+#include "gc/StoreBuffer-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::CountLeadingZeroes32;
+using mozilla::DebugOnly;
+
+// Note this is used for inter-wasm calls and may pass arguments and results
+// in floating point registers even if the system ABI does not.
+
+ABIArg ABIArgGenerator::next(MIRType type) {
+  switch (type) {
+    case MIRType::Int32:
+    case MIRType::Int64:
+    case MIRType::Pointer:
+    case MIRType::RefOrNull:
+    case MIRType::StackResults:
+      if (intRegIndex_ == NumIntArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uintptr_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_));
+      intRegIndex_++;
+      break;
+
+    case MIRType::Float32:
+    case MIRType::Double:
+      if (floatRegIndex_ == NumFloatArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(double);
+        break;
+      }
+      current_ = ABIArg(FloatRegister(FloatRegisters::Encoding(floatRegIndex_),
+                                      type == MIRType::Double
+                                          ? FloatRegisters::Double
+                                          : FloatRegisters::Single));
+      floatRegIndex_++;
+      break;
+
+#ifdef ENABLE_WASM_SIMD
+    case MIRType::Simd128:
+      if (floatRegIndex_ == NumFloatArgRegs) {
+        stackOffset_ = AlignBytes(stackOffset_, SimdMemoryAlignment);
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += FloatRegister::SizeOfSimd128;
+        break;
+      }
+      current_ = ABIArg(FloatRegister(FloatRegisters::Encoding(floatRegIndex_),
+                                      FloatRegisters::Simd128));
+      floatRegIndex_++;
+      break;
+#endif
+
+    default:
+      // Note that in Assembler-x64.cpp there's a special case for Win64 which
+      // does not allow passing SIMD by value.  Since there's Win64 on ARM64 we
+      // may need to duplicate that logic here.
+      MOZ_CRASH("Unexpected argument type");
+  }
+  return current_;
+}
+
+namespace js {
+namespace jit {
+
+void Assembler::finish() {
+  armbuffer_.flushPool();
+
+  // The extended jump table is part of the code buffer.
+  ExtendedJumpTable_ = emitExtendedJumpTable();
+  Assembler::FinalizeCode();
+}
+
+bool Assembler::appendRawCode(const uint8_t* code, size_t numBytes) {
+  flush();
+  return armbuffer_.appendRawCode(code, numBytes);
+}
+
+bool Assembler::reserve(size_t size) {
+  // This buffer uses fixed-size chunks so there's no point in reserving
+  // now vs. on-demand.
+  return !oom();
+}
+
+bool Assembler::swapBuffer(wasm::Bytes& bytes) {
+  // For now, specialize to the one use case. As long as wasm::Bytes is a
+  // Vector, not a linked-list of chunks, there's not much we can do other
+  // than copy.
+  MOZ_ASSERT(bytes.empty());
+  if (!bytes.resize(bytesNeeded())) {
+    return false;
+  }
+  armbuffer_.executableCopy(bytes.begin());
+  return true;
+}
+
+BufferOffset Assembler::emitExtendedJumpTable() {
+  if (!pendingJumps_.length() || oom()) {
+    return BufferOffset();
+  }
+
+  armbuffer_.flushPool();
+  armbuffer_.align(SizeOfJumpTableEntry);
+
+  BufferOffset tableOffset = armbuffer_.nextOffset();
+
+  for (size_t i = 0; i < pendingJumps_.length(); i++) {
+    // Each JumpTableEntry is of the form:
+    //   LDR ip0 [PC, 8]
+    //   BR ip0
+    //   [Patchable 8-byte constant low bits]
+    //   [Patchable 8-byte constant high bits]
+    DebugOnly<size_t> preOffset = size_t(armbuffer_.nextOffset().getOffset());
+
+    // The unguarded use of ScratchReg64 here is OK:
+    //
+    // - The present function is called from code that does not claim any
+    //   scratch registers, we're done compiling user code and are emitting jump
+    //   tables.  Hence the scratch registers are available when we enter.
+    //
+    // - The pendingJumps_ represent jumps to other code sections that are not
+    //   known to this MacroAssembler instance, and we're generating code to
+    //   jump there.  It is safe to assume that any code using such a generated
+    //   branch to an unknown location did not store any valuable value in any
+    //   scratch register.  Hence the scratch registers can definitely be
+    //   clobbered here.
+    //
+    // - Scratch register usage is restricted to sequential control flow within
+    //   MacroAssembler functions.  Hence the scratch registers will not be
+    //   clobbered by ldr and br as they are Assembler primitives, not
+    //   MacroAssembler functions.
+
+    ldr(ScratchReg64, ptrdiff_t(8 / vixl::kInstructionSize));
+    br(ScratchReg64);
+
+    DebugOnly<size_t> prePointer = size_t(armbuffer_.nextOffset().getOffset());
+    MOZ_ASSERT_IF(!oom(),
+                  prePointer - preOffset == OffsetOfJumpTableEntryPointer);
+
+    brk(0x0);
+    brk(0x0);
+
+    DebugOnly<size_t> postOffset = size_t(armbuffer_.nextOffset().getOffset());
+
+    MOZ_ASSERT_IF(!oom(), postOffset - preOffset == SizeOfJumpTableEntry);
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  return tableOffset;
+}
+
+void Assembler::executableCopy(uint8_t* buffer) {
+  // Copy the code and all constant pools into the output buffer.
+  armbuffer_.executableCopy(buffer);
+
+  // Patch any relative jumps that target code outside the buffer.
+  // The extended jump table may be used for distant jumps.
+  for (size_t i = 0; i < pendingJumps_.length(); i++) {
+    RelativePatch& rp = pendingJumps_[i];
+    MOZ_ASSERT(rp.target);
+
+    Instruction* target = (Instruction*)rp.target;
+    Instruction* branch = (Instruction*)(buffer + rp.offset.getOffset());
+    JumpTableEntry* extendedJumpTable = reinterpret_cast<JumpTableEntry*>(
+        buffer + ExtendedJumpTable_.getOffset());
+    if (branch->BranchType() != vixl::UnknownBranchType) {
+      if (branch->IsTargetReachable(target)) {
+        branch->SetImmPCOffsetTarget(target);
+      } else {
+        JumpTableEntry* entry = &extendedJumpTable[i];
+        branch->SetImmPCOffsetTarget(entry->getLdr());
+        entry->data = target;
+      }
+    } else {
+      // Currently a two-instruction call, it should be possible to optimize
+      // this into a single instruction call + nop in some instances, but this
+      // will work.
+    }
+  }
+}
+
+BufferOffset Assembler::immPool(ARMRegister dest, uint8_t* value,
+                                vixl::LoadLiteralOp op, const LiteralDoc& doc,
+                                ARMBuffer::PoolEntry* pe) {
+  uint32_t inst = op | Rt(dest);
+  const size_t numInst = 1;
+  const unsigned sizeOfPoolEntryInBytes = 4;
+  const unsigned numPoolEntries = sizeof(value) / sizeOfPoolEntryInBytes;
+  return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&inst, value,
+                               doc, pe);
+}
+
+BufferOffset Assembler::immPool64(ARMRegister dest, uint64_t value,
+                                  ARMBuffer::PoolEntry* pe) {
+  return immPool(dest, (uint8_t*)&value, vixl::LDR_x_lit, LiteralDoc(value),
+                 pe);
+}
+
+BufferOffset Assembler::fImmPool(ARMFPRegister dest, uint8_t* value,
+                                 vixl::LoadLiteralOp op,
+                                 const LiteralDoc& doc) {
+  uint32_t inst = op | Rt(dest);
+  const size_t numInst = 1;
+  const unsigned sizeOfPoolEntryInBits = 32;
+  const unsigned numPoolEntries = dest.size() / sizeOfPoolEntryInBits;
+  return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&inst, value,
+                               doc);
+}
+
+BufferOffset Assembler::fImmPool64(ARMFPRegister dest, double value) {
+  return fImmPool(dest, (uint8_t*)&value, vixl::LDR_d_lit, LiteralDoc(value));
+}
+
+BufferOffset Assembler::fImmPool32(ARMFPRegister dest, float value) {
+  return fImmPool(dest, (uint8_t*)&value, vixl::LDR_s_lit, LiteralDoc(value));
+}
+
+void Assembler::bind(Label* label, BufferOffset targetOffset) {
+#ifdef JS_DISASM_ARM64
+  spew_.spewBind(label);
+#endif
+  // Nothing has seen the label yet: just mark the location.
+  // If we've run out of memory, don't attempt to modify the buffer which may
+  // not be there. Just mark the label as bound to the (possibly bogus)
+  // targetOffset.
+  if (!label->used() || oom()) {
+    label->bind(targetOffset.getOffset());
+    return;
+  }
+
+  // Get the most recent instruction that used the label, as stored in the
+  // label. This instruction is the head of an implicit linked list of label
+  // uses.
+  BufferOffset branchOffset(label);
+
+  while (branchOffset.assigned()) {
+    // Before overwriting the offset in this instruction, get the offset of
+    // the next link in the implicit branch list.
+    BufferOffset nextOffset = NextLink(branchOffset);
+
+    // Linking against the actual (Instruction*) would be invalid,
+    // since that Instruction could be anywhere in memory.
+    // Instead, just link against the correct relative offset, assuming
+    // no constant pools, which will be taken into consideration
+    // during finalization.
+    ptrdiff_t relativeByteOffset =
+        targetOffset.getOffset() - branchOffset.getOffset();
+    Instruction* link = getInstructionAt(branchOffset);
+
+    // This branch may still be registered for callbacks. Stop tracking it.
+    vixl::ImmBranchType branchType = link->BranchType();
+    vixl::ImmBranchRangeType branchRange =
+        Instruction::ImmBranchTypeToRange(branchType);
+    if (branchRange < vixl::NumShortBranchRangeTypes) {
+      BufferOffset deadline(
+          branchOffset.getOffset() +
+          Instruction::ImmBranchMaxForwardOffset(branchRange));
+      armbuffer_.unregisterBranchDeadline(branchRange, deadline);
+    }
+
+    // Is link able to reach the label?
+    if (link->IsPCRelAddressing() ||
+        link->IsTargetReachable(link + relativeByteOffset)) {
+      // Write a new relative offset into the instruction.
+      link->SetImmPCOffsetTarget(link + relativeByteOffset);
+    } else {
+      // This is a short-range branch, and it can't reach the label directly.
+      // Verify that it branches to a veneer: an unconditional branch.
+      MOZ_ASSERT(getInstructionAt(nextOffset)->BranchType() ==
+                 vixl::UncondBranchType);
+    }
+
+    branchOffset = nextOffset;
+  }
+
+  // Bind the label, so that future uses may encode the offset immediately.
+  label->bind(targetOffset.getOffset());
+}
+
+void Assembler::addPendingJump(BufferOffset src, ImmPtr target,
+                               RelocationKind reloc) {
+  MOZ_ASSERT(target.value != nullptr);
+
+  if (reloc == RelocationKind::JITCODE) {
+    jumpRelocations_.writeUnsigned(src.getOffset());
+  }
+
+  // This jump is not patchable at runtime. Extended jump table entry
+  // requirements cannot be known until finalization, so to be safe, give each
+  // jump and entry. This also causes GC tracing of the target.
+  enoughMemory_ &=
+      pendingJumps_.append(RelativePatch(src, target.value, reloc));
+}
+
+void Assembler::PatchWrite_NearCall(CodeLocationLabel start,
+                                    CodeLocationLabel toCall) {
+  Instruction* dest = (Instruction*)start.raw();
+  ptrdiff_t relTarget = (Instruction*)toCall.raw() - dest;
+  ptrdiff_t relTarget00 = relTarget >> 2;
+  MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0);
+  MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00));
+
+  bl(dest, relTarget00);
+}
+
+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
+                                        PatchedImmPtr newValue,
+                                        PatchedImmPtr expected) {
+  Instruction* i = (Instruction*)label.raw();
+  void** pValue = i->LiteralAddress<void**>();
+  MOZ_ASSERT(*pValue == expected.value);
+  *pValue = newValue.value;
+}
+
+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
+                                        ImmPtr newValue, ImmPtr expected) {
+  PatchDataWithValueCheck(label, PatchedImmPtr(newValue.value),
+                          PatchedImmPtr(expected.value));
+}
+
+void Assembler::ToggleToJmp(CodeLocationLabel inst_) {
+  Instruction* i = (Instruction*)inst_.raw();
+  MOZ_ASSERT(i->IsAddSubImmediate());
+
+  // Refer to instruction layout in ToggleToCmp().
+  int imm19 = (int)i->Bits(23, 5);
+  MOZ_ASSERT(vixl::IsInt19(imm19));
+
+  b(i, imm19, Always);
+}
+
+void Assembler::ToggleToCmp(CodeLocationLabel inst_) {
+  Instruction* i = (Instruction*)inst_.raw();
+  MOZ_ASSERT(i->IsCondB());
+
+  int imm19 = i->ImmCondBranch();
+  // bit 23 is reserved, and the simulator throws an assertion when this happens
+  // It'll be messy to decode, but we can steal bit 30 or bit 31.
+  MOZ_ASSERT(vixl::IsInt18(imm19));
+
+  // 31 - 64-bit if set, 32-bit if unset. (OK!)
+  // 30 - sub if set, add if unset. (OK!)
+  // 29 - SetFlagsBit. Must be set.
+  // 22:23 - ShiftAddSub. (OK!)
+  // 10:21 - ImmAddSub. (OK!)
+  // 5:9 - First source register (Rn). (OK!)
+  // 0:4 - Destination Register. Must be xzr.
+
+  // From the above, there is a safe 19-bit contiguous region from 5:23.
+  Emit(i, vixl::ThirtyTwoBits | vixl::AddSubImmediateFixed | vixl::SUB |
+              Flags(vixl::SetFlags) | Rd(vixl::xzr) |
+              (imm19 << vixl::Rn_offset));
+}
+
+void Assembler::ToggleCall(CodeLocationLabel inst_, bool enabled) {
+  const Instruction* first = reinterpret_cast<Instruction*>(inst_.raw());
+  Instruction* load;
+  Instruction* call;
+
+  // There might be a constant pool at the very first instruction.
+  first = first->skipPool();
+
+  // Skip the stack pointer restore instruction.
+  if (first->IsStackPtrSync()) {
+    first = first->InstructionAtOffset(vixl::kInstructionSize)->skipPool();
+  }
+
+  load = const_cast<Instruction*>(first);
+
+  // The call instruction follows the load, but there may be an injected
+  // constant pool.
+  call = const_cast<Instruction*>(
+      load->InstructionAtOffset(vixl::kInstructionSize)->skipPool());
+
+  if (call->IsBLR() == enabled) {
+    return;
+  }
+
+  if (call->IsBLR()) {
+    // If the second instruction is blr(), then we have:
+    //   ldr x17, [pc, offset]
+    //   blr x17
+    MOZ_ASSERT(load->IsLDR());
+    // We want to transform this to:
+    //   adr xzr, [pc, offset]
+    //   nop
+    int32_t offset = load->ImmLLiteral();
+    adr(load, xzr, int32_t(offset));
+    nop(call);
+  } else {
+    // We have:
+    //   adr xzr, [pc, offset] (or ldr x17, [pc, offset])
+    //   nop
+    MOZ_ASSERT(load->IsADR() || load->IsLDR());
+    MOZ_ASSERT(call->IsNOP());
+    // Transform this to:
+    //   ldr x17, [pc, offset]
+    //   blr x17
+    int32_t offset = (int)load->ImmPCRawOffset();
+    MOZ_ASSERT(vixl::IsInt19(offset));
+    ldr(load, ScratchReg2_64, int32_t(offset));
+    blr(call, ScratchReg2_64);
+  }
+}
+
+// Patches loads generated by MacroAssemblerCompat::mov(CodeLabel*, Register).
+// The loading code is implemented in movePatchablePtr().
+void Assembler::UpdateLoad64Value(Instruction* inst0, uint64_t value) {
+  MOZ_ASSERT(inst0->IsLDR());
+  uint64_t* literal = inst0->LiteralAddress<uint64_t*>();
+  *literal = value;
+}
+
+class RelocationIterator {
+  CompactBufferReader reader_;
+  uint32_t offset_ = 0;
+
+ public:
+  explicit RelocationIterator(CompactBufferReader& reader) : reader_(reader) {}
+
+  bool read() {
+    if (!reader_.more()) {
+      return false;
+    }
+    offset_ = reader_.readUnsigned();
+    return true;
+  }
+
+  uint32_t offset() const { return offset_; }
+};
+
+static JitCode* CodeFromJump(JitCode* code, uint8_t* jump) {
+  const Instruction* inst = (const Instruction*)jump;
+  uint8_t* target;
+
+  // We're expecting a call created by MacroAssembler::call(JitCode*).
+  // It looks like:
+  //
+  //   ldr scratch, [pc, offset]
+  //   blr scratch
+  //
+  // If the call has been toggled by ToggleCall(), it looks like:
+  //
+  //   adr xzr, [pc, offset]
+  //   nop
+  //
+  // There might be a constant pool at the very first instruction.
+  // See also ToggleCall().
+  inst = inst->skipPool();
+
+  // Skip the stack pointer restore instruction.
+  if (inst->IsStackPtrSync()) {
+    inst = inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool();
+  }
+
+  if (inst->BranchType() != vixl::UnknownBranchType) {
+    // This is an immediate branch.
+    target = (uint8_t*)inst->ImmPCOffsetTarget();
+  } else if (inst->IsLDR()) {
+    // This is an ldr+blr call that is enabled. See ToggleCall().
+    mozilla::DebugOnly<const Instruction*> nextInst =
+        inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool();
+    MOZ_ASSERT(nextInst->IsNOP() || nextInst->IsBLR());
+    target = (uint8_t*)inst->Literal64();
+  } else if (inst->IsADR()) {
+    // This is a disabled call: adr+nop. See ToggleCall().
+    mozilla::DebugOnly<const Instruction*> nextInst =
+        inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool();
+    MOZ_ASSERT(nextInst->IsNOP());
+    ptrdiff_t offset = inst->ImmPCRawOffset() << vixl::kLiteralEntrySizeLog2;
+    // This is what Literal64 would do with the corresponding ldr.
+    memcpy(&target, inst + offset, sizeof(target));
+  } else {
+    MOZ_CRASH("Unrecognized jump instruction.");
+  }
+
+  // If the jump is within the code buffer, it uses the extended jump table.
+  if (target >= code->raw() &&
+      target < code->raw() + code->instructionsSize()) {
+    MOZ_ASSERT(target + Assembler::SizeOfJumpTableEntry <=
+               code->raw() + code->instructionsSize());
+
+    uint8_t** patchablePtr =
+        (uint8_t**)(target + Assembler::OffsetOfJumpTableEntryPointer);
+    target = *patchablePtr;
+  }
+
+  return JitCode::FromExecutable(target);
+}
+
+void Assembler::TraceJumpRelocations(JSTracer* trc, JitCode* code,
+                                     CompactBufferReader& reader) {
+  RelocationIterator iter(reader);
+  while (iter.read()) {
+    JitCode* child = CodeFromJump(code, code->raw() + iter.offset());
+    TraceManuallyBarrieredEdge(trc, &child, "rel32");
+    MOZ_ASSERT(child == CodeFromJump(code, code->raw() + iter.offset()));
+  }
+}
+
+/* static */
+void Assembler::TraceDataRelocations(JSTracer* trc, JitCode* code,
+                                     CompactBufferReader& reader) {
+  mozilla::Maybe<AutoWritableJitCode> awjc;
+
+  uint8_t* buffer = code->raw();
+
+  while (reader.more()) {
+    size_t offset = reader.readUnsigned();
+    Instruction* load = (Instruction*)&buffer[offset];
+
+    // The only valid traceable operation is a 64-bit load to an ARMRegister.
+    // Refer to movePatchablePtr() for generation.
+    MOZ_ASSERT(load->Mask(vixl::LoadLiteralMask) == vixl::LDR_x_lit);
+
+    uintptr_t* literalAddr = load->LiteralAddress<uintptr_t*>();
+    uintptr_t literal = *literalAddr;
+
+    // Data relocations can be for Values or for raw pointers. If a Value is
+    // zero-tagged, we can trace it as if it were a raw pointer. If a Value
+    // is not zero-tagged, we have to interpret it as a Value to ensure that the
+    // tag bits are masked off to recover the actual pointer.
+
+    if (literal >> JSVAL_TAG_SHIFT) {
+      // This relocation is a Value with a non-zero tag.
+      Value v = Value::fromRawBits(literal);
+      TraceManuallyBarrieredEdge(trc, &v, "jit-masm-value");
+      if (*literalAddr != v.asRawBits()) {
+        if (awjc.isNothing()) {
+          awjc.emplace(code);
+        }
+        *literalAddr = v.asRawBits();
+      }
+      continue;
+    }
+
+    // This relocation is a raw pointer or a Value with a zero tag.
+    // No barriers needed since the pointers are constants.
+    gc::Cell* cell = reinterpret_cast<gc::Cell*>(literal);
+    MOZ_ASSERT(gc::IsCellPointerValid(cell));
+    TraceManuallyBarrieredGenericPointerEdge(trc, &cell, "jit-masm-ptr");
+    if (uintptr_t(cell) != literal) {
+      if (awjc.isNothing()) {
+        awjc.emplace(code);
+      }
+      *literalAddr = uintptr_t(cell);
+    }
+  }
+}
+
+void Assembler::retarget(Label* label, Label* target) {
+#ifdef JS_DISASM_ARM64
+  spew_.spewRetarget(label, target);
+#endif
+  if (label->used()) {
+    if (target->bound()) {
+      bind(label, BufferOffset(target));
+    } else if (target->used()) {
+      // The target is not bound but used. Prepend label's branch list
+      // onto target's.
+      BufferOffset labelBranchOffset(label);
+
+      // Find the head of the use chain for label.
+      BufferOffset next = NextLink(labelBranchOffset);
+      while (next.assigned()) {
+        labelBranchOffset = next;
+        next = NextLink(next);
+      }
+
+      // Then patch the head of label's use chain to the tail of target's
+      // use chain, prepending the entire use chain of target.
+      SetNextLink(labelBranchOffset, BufferOffset(target));
+      target->use(label->offset());
+    } else {
+      // The target is unbound and unused. We can just take the head of
+      // the list hanging off of label, and dump that into target.
+      target->use(label->offset());
+    }
+  }
+  label->reset();
+}
+
+}  // namespace jit
+}  // namespace js
diff --git a/js/src/jit/arm64/Assembler-arm64.h b/js/src/jit/arm64/Assembler-arm64.h
new file mode 100644
index 0000000000..9745e9d262
--- /dev/null
+++ b/js/src/jit/arm64/Assembler-arm64.h
@@ -0,0 +1,793 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef A64_ASSEMBLER_A64_H_
+#define A64_ASSEMBLER_A64_H_
+
+#include <iterator>
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+
+#include "jit/CompactBuffer.h"
+#include "jit/shared/Disassembler-shared.h"
+#include "wasm/WasmTypeDecls.h"
+
+namespace js {
+namespace jit {
+
+// VIXL imports.
+typedef vixl::Register ARMRegister;
+typedef vixl::FPRegister ARMFPRegister;
+using vixl::ARMBuffer;
+using vixl::Instruction;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+
+static const uint32_t AlignmentAtPrologue = 0;
+static const uint32_t AlignmentMidPrologue = 8;
+static const Scale ScalePointer = TimesEight;
+
+// The MacroAssembler uses scratch registers extensively and unexpectedly.
+// For safety, scratch registers should always be acquired using
+// vixl::UseScratchRegisterScope.
+static constexpr Register ScratchReg{Registers::ip0};
+static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64};
+
+static constexpr Register ScratchReg2{Registers::ip1};
+static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64};
+
+static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0,
+                                                  FloatRegisters::Double};
+static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31,
+                                                    FloatRegisters::Double};
+struct ScratchDoubleScope : public AutoFloatRegisterScope {
+  explicit ScratchDoubleScope(MacroAssembler& masm)
+      : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {}
+};
+
+static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0,
+                                                   FloatRegisters::Single};
+static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31,
+                                                     FloatRegisters::Single};
+struct ScratchFloat32Scope : public AutoFloatRegisterScope {
+  explicit ScratchFloat32Scope(MacroAssembler& masm)
+      : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {}
+};
+
+#ifdef ENABLE_WASM_SIMD
+static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0,
+                                                   FloatRegisters::Simd128};
+static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31,
+                                                    FloatRegisters::Simd128};
+struct ScratchSimd128Scope : public AutoFloatRegisterScope {
+  explicit ScratchSimd128Scope(MacroAssembler& masm)
+      : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {}
+};
+#else
+struct ScratchSimd128Scope : public AutoFloatRegisterScope {
+  explicit ScratchSimd128Scope(MacroAssembler& masm)
+      : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {
+    MOZ_CRASH("SIMD not enabled");
+  }
+};
+#endif
+
+static constexpr Register InvalidReg{Registers::Invalid};
+static constexpr FloatRegister InvalidFloatReg = {};
+
+static constexpr Register OsrFrameReg{Registers::x3};
+static constexpr Register CallTempReg0{Registers::x9};
+static constexpr Register CallTempReg1{Registers::x10};
+static constexpr Register CallTempReg2{Registers::x11};
+static constexpr Register CallTempReg3{Registers::x12};
+static constexpr Register CallTempReg4{Registers::x13};
+static constexpr Register CallTempReg5{Registers::x14};
+
+static constexpr Register PreBarrierReg{Registers::x1};
+
+static constexpr Register InterpreterPCReg{Registers::x9};
+
+static constexpr Register ReturnReg{Registers::x0};
+static constexpr Register64 ReturnReg64(ReturnReg);
+static constexpr Register JSReturnReg{Registers::x2};
+static constexpr Register FramePointer{Registers::fp};
+static constexpr ARMRegister FramePointer64{FramePointer, 64};
+static constexpr Register ZeroRegister{Registers::sp};
+static constexpr ARMRegister ZeroRegister64{Registers::sp, 64};
+static constexpr ARMRegister ZeroRegister32{Registers::sp, 32};
+
+// [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions
+//
+//                               ================
+//
+// Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer:
+//
+// The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it
+// is used as the base pointer for a memory access.  (SP+offset need not be
+// 16-byte aligned, but the SP value itself must be.)  The SP register may
+// take on unaligned values but may not be used for a memory access while it
+// is unaligned.
+//
+// Stack-alignment checking can be enabled or disabled by a control register;
+// however that register cannot be modified by user space.  We have to assume
+// stack alignment checking is enabled, and that does usually appear to be the
+// case.  See the ARM Architecture Reference Manual, "D1.8.2 SP alignment
+// checking", for further details.
+//
+// A second constraint is forced upon us by the ARM64 ABI.  This requires that
+// all accesses to the stack must be at or above SP.  Accesses below SP are
+// strictly forbidden, presumably because the kernel might use that area of
+// memory for its own purposes -- in particular, signal delivery -- and hence
+// it may get trashed at any time.
+//
+// Note this doesn't mean that accesses to the stack must be based off
+// register SP.  Only that the effective addresses must be >= SP, regardless
+// of how the address is formed.
+//
+// In order to allow word-wise pushes and pops, some of our ARM64 jits
+// (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to
+// be used as a PseudoStackPointer (PSP).
+//
+// Initially the PSP will have the same value as the SP.  Code can, if it
+// wants, push a single word by subtracting 8 from the PSP, doing SP := PSP,
+// then storing the value at PSP+0.  Given other constraints on the alignment
+// of the SP at function call boundaries, this works out OK, at the cost of
+// the two extra instructions per push / pop.
+//
+// This is all a bit messy, and is probably not robustly adhered to.  However,
+// the following appear to be the intended, and mostly implemented, current
+// invariants:
+//
+// (1) PSP is "primary", SP is "secondary".  Most stack refs are
+//     PSP-relative. SP-relative is rare and (obviously) only done when we
+//     know that SP is aligned.
+//
+// (2) At all times, the relationship SP <= PSP is maintained.  The fact that
+//     SP may validly be less than PSP means that pushes on the stack force
+//     the two values to become equal, by copying PSP into SP.  However, pops
+//     behave differently: PSP moves back up and SP stays the same, since that
+//     doesn't break the SP <= PSP invariant.
+//
+// (3) However, immediately before a call instruction, SP and PSP must be the
+//     same.  To enforce this, PSP is copied into SP by the arm64-specific
+//     MacroAssembler::call routines.
+//
+// (4) Also, after a function has returned, it is expected that SP holds the
+//     "primary" value.  How exactly this is implemented remains not entirely
+//     clear and merits further investigation.  The following points are
+//     believed to be relevant:
+//
+//     - For calls to functions observing the system AArch64 ABI, PSP (x28) is
+//       callee-saved.  That, combined with (3) above, implies SP == PSP
+//       immediately after the call returns.
+//
+//     - JIT-generated routines return using MacroAssemblerCompat::retn, and
+//       that copies PSP into SP (bizarrely; this would make more sense if it
+//       copied SP into PSP); but in any case, the point is that they are the
+//       same at the point that the return instruction executes.
+//
+//     - MacroAssembler::callWithABIPost copies PSP into SP after the return
+//       of a call requiring dynamic alignment.
+//
+//     Given the above, it is unclear exactly where in the return sequence it
+//     is expected that SP == PSP, and also whether it is the callee or caller
+//     that is expected to enforce it.
+//
+// In general it would be nice to be able to move (at some time in the future,
+// not now) to a world where *every* assignment to PSP or SP is followed
+// immediately by a copy into the other register.  That would make all
+// required correctness proofs trivial in the sense that it would require only
+// local inspection of code immediately following (dominated by) any such
+// assignment.  For the moment, however, this is a guideline, not a hard
+// requirement.
+//
+//                               ================
+//
+// Mechanics of keeping the stack pointers in sync:
+//
+// The following two methods require that the masm's SP has been set to the PSP
+// with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be
+// no-ops.  The setup is performed manually by the jits after creating the masm.
+//
+// * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has
+//   been updated, so SP needs to move too.  This is used pretty liberally
+//   throughout the code base.
+//
+// * MacroAssembler::initPseudoStackPtr() performs PSP := SP.  This can be used
+//   after calls to non-ABI compliant code; it's not used much.
+//
+// In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that
+// recognizes the instruction emitted by syncStackPtr(), and this is used to
+// skip that instruction a few places, should it be present, in the JS JIT where
+// code is generated to deal with toggled calls.
+//
+// In various places there are calls to MacroAssembler::syncStackPtr() which
+// appear to be redundant.  Investigation shows that they often are redundant,
+// but not always.  Finding and removing such redundancies would be quite some
+// work, so we live for now with the occasional redundant update.  Perusal of
+// the Cortex-A55 and -A72 optimization guides shows no evidence that such
+// assignments are any more expensive than assignments between vanilla integer
+// registers, so the costs of such redundant updates are assumed to be small.
+//
+// Invariants on the PSP at function call boundaries:
+//
+// It *appears* that the following invariants exist:
+//
+// * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via
+//   both registers.
+//
+// * On entry to C++ code, PSP == SP.  Certainly it appears that all calls
+//   created by the MacroAssembler::call(..) routines perform 'syncStackPtr'
+//   immediately before the call, and all ABI calls are routed through the
+//   MacroAssembler::call layer.
+//
+// * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the
+//   active stack pointer and that PSP is dead.
+//
+// * The PSP is non-volatile (callee-saved).  Along a normal return path from
+//   JIT code, simply having PSP == SP on exit is correct, since the exit SP is
+//   the same as the entry SP by the JIT ABI.
+//
+// * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be
+//   used), and will not need to restore the PSP on return because x28 is
+//   non-volatile in the ARM64 ABI.
+//
+//                               ================
+//
+// Future cleanups to the SP-vs-PSP machinery:
+//
+// Currently we have somewhat unclear invariants, which are not obviously
+// always enforced, and which may require complex non-local reasoning.
+// Auditing the code to ensure that the invariants always hold, whilst not
+// generating duplicate syncs, is close to impossible.  A future rework to
+// tidy this might be as follows.  (This suggestion pertains the the entire
+// JIT complex: all of the JS compilers, wasm compilers, stub generators,
+// regexp compilers, etc).
+//
+// Currently we have that, in JIT-generated code, PSP is "primary" and SP is
+// "secondary", meaning that PSP has the "real" stack pointer value and SP is
+// updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP.
+// An exception to this scheme is the stubs code generated by WasmStubs.cpp,
+// which assumes that SP is "primary" and PSP is dead.
+//
+// It might give us an easier incremental path to eventually removing PSP
+// entirely if we switched to having SP always be the primary.  That is:
+//
+// (1) SP is primary, PSP is secondary
+// (2) After any assignment to SP, it is copied into PSP
+// (3) All (non-frame-pointer-based) stack accesses are PSP-relative
+//     (as at present)
+//
+// This would have the effect that:
+//
+// * It would reinstate the invariant that on all targets, the "real" SP value
+//   is in the ABI-and-or-hardware-mandated stack pointer register.
+//
+// * It would give us a simple story about calls and returns:
+//   - for calls to non-JIT generated code (viz, C++ etc), we need no extra
+//     copies, because PSP (x28) is callee-saved
+//   - for calls to JIT-generated code, we need no extra copies, because of (2)
+//     above
+//
+// * We could incrementally migrate those parts of the code generator where we
+//   know that SP is 16-aligned, to use SP- rather than PSP-relative accesses
+//
+// * The consistent use of (2) would remove the requirement to have to perform
+//   path-dependent reasoning (for paths in the generated code, not in the
+//   compiler) when reading/understanding the code.
+//
+// * x28 would become free for use by stubs and the baseline compiler without
+//   having to worry about interoperating with code that expects x28 to hold a
+//   valid PSP.
+//
+// One might ask what mechanical checks we can add to ensure correctness, rather
+// than having to verify these invariants by hand indefinitely.  Maybe some
+// combination of:
+//
+// * In debug builds, compiling-in assert(SP == PSP) at critical places.  This
+//   can be done using the existing `assertStackPtrsSynced` function.
+//
+// * In debug builds, scanning sections of generated code to ensure no
+//   SP-relative stack accesses have been created -- for some sections, at
+//   least every assignment to SP is immediately followed by a copy to x28.
+//   This would also facilitate detection of duplicate syncs.
+//
+//                               ================
+//
+// Other investigative notes, for the code base at present:
+//
+// * Some disassembly dumps suggest that we sync the stack pointer too often.
+//   This could be the result of various pieces of code working at cross
+//   purposes when syncing the stack pointer, or of not paying attention to the
+//   precise invariants.
+//
+// * As documented in RegExpNativeMacroAssembler.cpp, function
+//   SMRegExpMacroAssembler::createStackFrame:
+//
+//   // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
+//   // addressing.  The register we use for PSP may however also be used by
+//   // calling code, and it is nonvolatile, so save it.  Do this as a special
+//   // case first because the generic save/restore code needs the PSP to be
+//   // initialized already.
+//
+//   and also in function SMRegExpMacroAssembler::exitHandler:
+//
+//   // Restore the saved value of the PSP register, this value is whatever the
+//   // caller had saved in it, not any actual SP value, and it must not be
+//   // overwritten subsequently.
+//
+//   The original source for these comments was a patch for bug 1445907.
+//
+// * MacroAssembler-arm64.h has an interesting comment in the retn()
+//   function:
+//
+//   syncStackPtr();  // SP is always used to transmit the stack between calls.
+//
+//   Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp,
+//   at callWithABIPre and callWithABIPost.
+//
+// * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find
+//
+//   // SP is used to transfer stack across call boundaries.
+//   masm.initPseudoStackPtr();
+//
+//   after the return point of a callWithVMWrapper.  The only reasonable
+//   conclusion from all those (assuming they are right) is that SP == PSP.
+//
+// * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP
+//   and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP
+//   before a call to wasm code.
+//
+//                               ================
+
+// StackPointer is intentionally undefined on ARM64 to prevent misuse: using
+// sp as a base register is only valid if sp % 16 == 0.
+static constexpr Register RealStackPointer{Registers::sp};
+
+static constexpr Register PseudoStackPointer{Registers::x28};
+static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64};
+static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32};
+
+static constexpr Register IntArgReg0{Registers::x0};
+static constexpr Register IntArgReg1{Registers::x1};
+static constexpr Register IntArgReg2{Registers::x2};
+static constexpr Register IntArgReg3{Registers::x3};
+static constexpr Register IntArgReg4{Registers::x4};
+static constexpr Register IntArgReg5{Registers::x5};
+static constexpr Register IntArgReg6{Registers::x6};
+static constexpr Register IntArgReg7{Registers::x7};
+static constexpr Register HeapReg{Registers::x21};
+
+// Define unsized Registers.
+#define DEFINE_UNSIZED_REGISTERS(N) \
+  static constexpr Register r##N{Registers::x##N};
+REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS)
+#undef DEFINE_UNSIZED_REGISTERS
+static constexpr Register ip0{Registers::x16};
+static constexpr Register ip1{Registers::x17};
+static constexpr Register fp{Registers::x29};
+static constexpr Register lr{Registers::x30};
+static constexpr Register rzr{Registers::xzr};
+
+// Import VIXL registers into the js::jit namespace.
+#define IMPORT_VIXL_REGISTERS(N)                  \
+  static constexpr ARMRegister w##N = vixl::w##N; \
+  static constexpr ARMRegister x##N = vixl::x##N;
+REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS)
+#undef IMPORT_VIXL_REGISTERS
+static constexpr ARMRegister wzr = vixl::wzr;
+static constexpr ARMRegister xzr = vixl::xzr;
+static constexpr ARMRegister wsp = vixl::wsp;
+static constexpr ARMRegister sp = vixl::sp;
+
+// Import VIXL VRegisters into the js::jit namespace.
+#define IMPORT_VIXL_VREGISTERS(N)                   \
+  static constexpr ARMFPRegister s##N = vixl::s##N; \
+  static constexpr ARMFPRegister d##N = vixl::d##N;
+REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS)
+#undef IMPORT_VIXL_VREGISTERS
+
+static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg);
+
+// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use
+// JSReturnOperand).
+static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpMatcherStringReg = CallTempReg1;
+static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2;
+
+// Registers used by RegExpExecTest stub (do not use ReturnReg).
+static constexpr Register RegExpExecTestRegExpReg = CallTempReg0;
+static constexpr Register RegExpExecTestStringReg = CallTempReg1;
+
+// Registers used by RegExpSearcher stub (do not use ReturnReg).
+static constexpr Register RegExpSearcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpSearcherStringReg = CallTempReg1;
+static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2;
+
+static constexpr Register JSReturnReg_Type = r3;
+static constexpr Register JSReturnReg_Data = r2;
+
+static constexpr FloatRegister NANReg = {FloatRegisters::d14,
+                                         FloatRegisters::Single};
+// N.B. r8 isn't listed as an aapcs temp register, but we can use it as such
+// because we never use return-structs.
+static constexpr Register CallTempNonArgRegs[] = {r8,  r9,  r10, r11,
+                                                  r12, r13, r14, r15};
+static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs);
+
+static constexpr uint32_t JitStackAlignment = 16;
+
+static constexpr uint32_t JitStackValueAlignment =
+    JitStackAlignment / sizeof(Value);
+static_assert(JitStackAlignment % sizeof(Value) == 0 &&
+                  JitStackValueAlignment >= 1,
+              "Stack alignment should be a non-zero multiple of sizeof(Value)");
+
+static constexpr uint32_t SimdMemoryAlignment = 16;
+
+static_assert(CodeAlignment % SimdMemoryAlignment == 0,
+              "Code alignment should be larger than any of the alignments "
+              "which are used for "
+              "the constant sections of the code buffer.  Thus it should be "
+              "larger than the "
+              "alignment for SIMD constants.");
+
+static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
+static const uint32_t WasmTrapInstructionLength = 4;
+
+// See comments in wasm::GenerateFunctionPrologue.  The difference between these
+// is the size of the largest callable prologue on the platform.
+static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
+
+class Assembler : public vixl::Assembler {
+ public:
+  Assembler() : vixl::Assembler() {}
+
+  typedef vixl::Condition Condition;
+
+  void finish();
+  bool appendRawCode(const uint8_t* code, size_t numBytes);
+  bool reserve(size_t size);
+  bool swapBuffer(wasm::Bytes& bytes);
+
+  // Emit the jump table, returning the BufferOffset to the first entry in the
+  // table.
+  BufferOffset emitExtendedJumpTable();
+  BufferOffset ExtendedJumpTable_;
+  void executableCopy(uint8_t* buffer);
+
+  BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op,
+                       const LiteralDoc& doc,
+                       ARMBuffer::PoolEntry* pe = nullptr);
+  BufferOffset immPool64(ARMRegister dest, uint64_t value,
+                         ARMBuffer::PoolEntry* pe = nullptr);
+  BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value,
+                        vixl::LoadLiteralOp op, const LiteralDoc& doc);
+  BufferOffset fImmPool64(ARMFPRegister dest, double value);
+  BufferOffset fImmPool32(ARMFPRegister dest, float value);
+
+  uint32_t currentOffset() const { return nextOffset().getOffset(); }
+
+  void bind(Label* label) { bind(label, nextOffset()); }
+  void bind(Label* label, BufferOffset boff);
+  void bind(CodeLabel* label) { label->target()->bind(currentOffset()); }
+
+  void setUnlimitedBuffer() { armbuffer_.setUnlimited(); }
+  bool oom() const {
+    return AssemblerShared::oom() || armbuffer_.oom() ||
+           jumpRelocations_.oom() || dataRelocations_.oom();
+  }
+
+  void copyJumpRelocationTable(uint8_t* dest) const {
+    if (jumpRelocations_.length()) {
+      memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length());
+    }
+  }
+  void copyDataRelocationTable(uint8_t* dest) const {
+    if (dataRelocations_.length()) {
+      memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length());
+    }
+  }
+
+  size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); }
+  size_t dataRelocationTableBytes() const { return dataRelocations_.length(); }
+  size_t bytesNeeded() const {
+    return SizeOfCodeGenerated() + jumpRelocationTableBytes() +
+           dataRelocationTableBytes();
+  }
+
+  void processCodeLabels(uint8_t* rawCode) {
+    for (const CodeLabel& label : codeLabels_) {
+      Bind(rawCode, label);
+    }
+  }
+
+  static void UpdateLoad64Value(Instruction* inst0, uint64_t value);
+
+  static void Bind(uint8_t* rawCode, const CodeLabel& label) {
+    auto mode = label.linkMode();
+    size_t patchAtOffset = label.patchAt().offset();
+    size_t targetOffset = label.target().offset();
+
+    if (mode == CodeLabel::MoveImmediate) {
+      Instruction* inst = (Instruction*)(rawCode + patchAtOffset);
+      Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset));
+    } else {
+      *reinterpret_cast<const void**>(rawCode + patchAtOffset) =
+          rawCode + targetOffset;
+    }
+  }
+
+  void retarget(Label* cur, Label* next);
+
+  // The buffer is about to be linked. Ensure any constant pools or
+  // excess bookkeeping has been flushed to the instruction stream.
+  void flush() { armbuffer_.flushPool(); }
+
+  void comment(const char* msg) {
+#ifdef JS_DISASM_ARM64
+    spew_.spew("; %s", msg);
+#endif
+  }
+
+  void setPrinter(Sprinter* sp) {
+#ifdef JS_DISASM_ARM64
+    spew_.setPrinter(sp);
+#endif
+  }
+
+  static bool SupportsFloatingPoint() { return true; }
+  static bool SupportsUnalignedAccesses() { return true; }
+  static bool SupportsFastUnalignedFPAccesses() { return true; }
+  static bool SupportsWasmSimd() { return true; }
+
+  static bool HasRoundInstruction(RoundingMode mode) {
+    switch (mode) {
+      case RoundingMode::Up:
+      case RoundingMode::Down:
+      case RoundingMode::NearestTiesToEven:
+      case RoundingMode::TowardsZero:
+        return true;
+    }
+    MOZ_CRASH("unexpected mode");
+  }
+
+ protected:
+  // Add a jump whose target is unknown until finalization.
+  // The jump may not be patched at runtime.
+  void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind);
+
+ public:
+  static uint32_t PatchWrite_NearCallSize() { return 4; }
+
+  static uint32_t NopSize() { return 4; }
+
+  static void PatchWrite_NearCall(CodeLocationLabel start,
+                                  CodeLocationLabel toCall);
+  static void PatchDataWithValueCheck(CodeLocationLabel label,
+                                      PatchedImmPtr newValue,
+                                      PatchedImmPtr expected);
+
+  static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue,
+                                      ImmPtr expected);
+
+  static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) {
+    // Raw is going to be the return address.
+    uint32_t* raw = (uint32_t*)label.raw();
+    // Overwrite the 4 bytes before the return address, which will end up being
+    // the call instruction.
+    *(raw - 1) = imm.value;
+  }
+  static uint32_t AlignDoubleArg(uint32_t offset) {
+    MOZ_CRASH("AlignDoubleArg()");
+  }
+  static uintptr_t GetPointer(uint8_t* ptr) {
+    Instruction* i = reinterpret_cast<Instruction*>(ptr);
+    uint64_t ret = i->Literal64();
+    return ret;
+  }
+
+  // Toggle a jmp or cmp emitted by toggledJump().
+  static void ToggleToJmp(CodeLocationLabel inst_);
+  static void ToggleToCmp(CodeLocationLabel inst_);
+  static void ToggleCall(CodeLocationLabel inst_, bool enabled);
+
+  static void TraceJumpRelocations(JSTracer* trc, JitCode* code,
+                                   CompactBufferReader& reader);
+  static void TraceDataRelocations(JSTracer* trc, JitCode* code,
+                                   CompactBufferReader& reader);
+
+  void assertNoGCThings() const {
+#ifdef DEBUG
+    MOZ_ASSERT(dataRelocations_.length() == 0);
+    for (auto& j : pendingJumps_) {
+      MOZ_ASSERT(j.kind == RelocationKind::HARDCODED);
+    }
+#endif
+  }
+
+ public:
+  // A Jump table entry is 2 instructions, with 8 bytes of raw data
+  static const size_t SizeOfJumpTableEntry = 16;
+
+  struct JumpTableEntry {
+    uint32_t ldr;
+    uint32_t br;
+    void* data;
+
+    Instruction* getLdr() { return reinterpret_cast<Instruction*>(&ldr); }
+  };
+
+  // Offset of the patchable target for the given entry.
+  static const size_t OffsetOfJumpTableEntryPointer = 8;
+
+ public:
+  void writeCodePointer(CodeLabel* label) {
+    armbuffer_.assertNoPoolAndNoNops();
+    uintptr_t x = uintptr_t(-1);
+    BufferOffset off = EmitData(&x, sizeof(uintptr_t));
+    label->patchAt()->bind(off.getOffset());
+  }
+
+  void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
+                                   const Disassembler::HeapAccess& heapAccess) {
+    MOZ_CRASH("verifyHeapAccessDisassembly");
+  }
+
+ protected:
+  // Structure for fixing up pc-relative loads/jumps when the machine
+  // code gets moved (executable copy, gc, etc.).
+  struct RelativePatch {
+    BufferOffset offset;
+    void* target;
+    RelocationKind kind;
+
+    RelativePatch(BufferOffset offset, void* target, RelocationKind kind)
+        : offset(offset), target(target), kind(kind) {}
+  };
+
+  // List of jumps for which the target is either unknown until finalization,
+  // or cannot be known due to GC. Each entry here requires a unique entry
+  // in the extended jump table, and is patched at finalization.
+  js::Vector<RelativePatch, 8, SystemAllocPolicy> pendingJumps_;
+
+  // Final output formatters.
+  CompactBufferWriter jumpRelocations_;
+  CompactBufferWriter dataRelocations_;
+};
+
+static const uint32_t NumIntArgRegs = 8;
+static const uint32_t NumFloatArgRegs = 8;
+
+class ABIArgGenerator {
+ public:
+  ABIArgGenerator()
+      : intRegIndex_(0), floatRegIndex_(0), stackOffset_(0), current_() {}
+
+  ABIArg next(MIRType argType);
+  ABIArg& current() { return current_; }
+  uint32_t stackBytesConsumedSoFar() const { return stackOffset_; }
+  void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; }
+
+ protected:
+  unsigned intRegIndex_;
+  unsigned floatRegIndex_;
+  uint32_t stackOffset_;
+  ABIArg current_;
+};
+
+// These registers may be volatile or nonvolatile.
+static constexpr Register ABINonArgReg0 = r8;
+static constexpr Register ABINonArgReg1 = r9;
+static constexpr Register ABINonArgReg2 = r10;
+static constexpr Register ABINonArgReg3 = r11;
+
+// This register may be volatile or nonvolatile. Avoid d31 which is the
+// ScratchDoubleReg_.
+static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16,
+                                                     FloatRegisters::Single};
+
+// These registers may be volatile or nonvolatile.
+// Note: these three registers are all guaranteed to be different
+static constexpr Register ABINonArgReturnReg0 = r8;
+static constexpr Register ABINonArgReturnReg1 = r9;
+static constexpr Register ABINonVolatileReg{Registers::x19};
+
+// This register is guaranteed to be clobberable during the prologue and
+// epilogue of an ABI call which must preserve both ABI argument, return
+// and non-volatile registers.
+static constexpr Register ABINonArgReturnVolatileReg = lr;
+
+// Instance pointer argument register for WebAssembly functions. This must not
+// alias any other register used for passing function arguments or return
+// values. Preserved by WebAssembly functions.  Must be nonvolatile.
+static constexpr Register InstanceReg{Registers::x23};
+
+// Registers used for wasm table calls. These registers must be disjoint
+// from the ABI argument registers, InstanceReg and each other.
+static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmTableCallSigReg = ABINonArgReg2;
+static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
+
+// Registers used for ref calls.
+static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmCallRefReg = ABINonArgReg3;
+
+// Register used as a scratch along the return path in the fast js -> wasm stub
+// code.  This must not overlap ReturnReg, JSReturnOperand, or InstanceReg.
+// It must be a volatile register.
+static constexpr Register WasmJitEntryReturnScratch = r9;
+
+static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+                                Register* out) {
+  if (usedIntArgs >= NumIntArgRegs) {
+    return false;
+  }
+  *out = Register::FromCode(usedIntArgs);
+  return true;
+}
+
+static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+                                  FloatRegister* out) {
+  if (usedFloatArgs >= NumFloatArgRegs) {
+    return false;
+  }
+  *out = FloatRegister::FromCode(usedFloatArgs);
+  return true;
+}
+
+// Get a register in which we plan to put a quantity that will be used as an
+// integer argument.  This differs from GetIntArgReg in that if we have no more
+// actual argument registers to use we will fall back on using whatever
+// CallTempReg* don't overlap the argument registers, and only fail once those
+// run out too.
+static inline bool GetTempRegForIntArg(uint32_t usedIntArgs,
+                                       uint32_t usedFloatArgs, Register* out) {
+  if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) {
+    return true;
+  }
+  // Unfortunately, we have to assume things about the point at which
+  // GetIntArgReg returns false, because we need to know how many registers it
+  // can allocate.
+  usedIntArgs -= NumIntArgRegs;
+  if (usedIntArgs >= NumCallTempNonArgRegs) {
+    return false;
+  }
+  *out = CallTempNonArgRegs[usedIntArgs];
+  return true;
+}
+
+inline Imm32 Imm64::firstHalf() const { return low(); }
+
+inline Imm32 Imm64::secondHalf() const { return hi(); }
+
+// Forbids nop filling for testing purposes. Not nestable.
+class AutoForbidNops {
+ protected:
+  Assembler* asm_;
+
+ public:
+  explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); }
+  ~AutoForbidNops() { asm_->leaveNoNops(); }
+};
+
+// Forbids pool generation during a specified interval. Not nestable.
+class AutoForbidPoolsAndNops : public AutoForbidNops {
+ public:
+  AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst)
+      : AutoForbidNops(asm_) {
+    asm_->enterNoPool(maxInst);
+  }
+  ~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif  // A64_ASSEMBLER_A64_H_
diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp
new file mode 100644
index 0000000000..d738ea548e
--- /dev/null
+++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@@ -0,0 +1,4245 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/CodeGenerator-arm64.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include "jsnum.h"
+
+#include "jit/CodeGenerator.h"
+#include "jit/InlineScriptTree.h"
+#include "jit/JitRuntime.h"
+#include "jit/MIR.h"
+#include "jit/MIRGraph.h"
+#include "jit/ReciprocalMulConstants.h"
+#include "vm/JSContext.h"
+#include "vm/Realm.h"
+#include "vm/Shape.h"
+
+#include "jit/shared/CodeGenerator-shared-inl.h"
+#include "vm/JSScript-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using JS::GenericNaN;
+using mozilla::FloorLog2;
+using mozilla::Maybe;
+using mozilla::NegativeInfinity;
+using mozilla::Nothing;
+using mozilla::Some;
+
+// shared
+CodeGeneratorARM64::CodeGeneratorARM64(MIRGenerator* gen, LIRGraph* graph,
+                                       MacroAssembler* masm)
+    : CodeGeneratorShared(gen, graph, masm) {}
+
+bool CodeGeneratorARM64::generateOutOfLineCode() {
+  AutoCreatedBy acb(masm, "CodeGeneratorARM64::generateOutOfLineCode");
+
+  if (!CodeGeneratorShared::generateOutOfLineCode()) {
+    return false;
+  }
+
+  if (deoptLabel_.used()) {
+    // All non-table-based bailouts will go here.
+    masm.bind(&deoptLabel_);
+
+    // Store the frame size, so the handler can recover the IonScript.
+    masm.push(Imm32(frameSize()));
+
+    TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler();
+    masm.jump(handler);
+  }
+
+  return !masm.oom();
+}
+
+void CodeGeneratorARM64::emitBranch(Assembler::Condition cond,
+                                    MBasicBlock* mirTrue,
+                                    MBasicBlock* mirFalse) {
+  if (isNextBlock(mirFalse->lir())) {
+    jumpToBlock(mirTrue, cond);
+  } else {
+    jumpToBlock(mirFalse, Assembler::InvertCondition(cond));
+    jumpToBlock(mirTrue);
+  }
+}
+
+void OutOfLineBailout::accept(CodeGeneratorARM64* codegen) {
+  codegen->visitOutOfLineBailout(this);
+}
+
+void CodeGenerator::visitTestIAndBranch(LTestIAndBranch* test) {
+  Register input = ToRegister(test->input());
+  MBasicBlock* mirTrue = test->ifTrue();
+  MBasicBlock* mirFalse = test->ifFalse();
+
+  // Jump to the True block if NonZero.
+  // Jump to the False block if Zero.
+  if (isNextBlock(mirFalse->lir())) {
+    masm.branch32(Assembler::NonZero, input, Imm32(0),
+                  getJumpLabelForBranch(mirTrue));
+  } else {
+    masm.branch32(Assembler::Zero, input, Imm32(0),
+                  getJumpLabelForBranch(mirFalse));
+    if (!isNextBlock(mirTrue->lir())) {
+      jumpToBlock(mirTrue);
+    }
+  }
+}
+
+void CodeGenerator::visitCompare(LCompare* comp) {
+  const MCompare* mir = comp->mir();
+  const MCompare::CompareType type = mir->compareType();
+  const Assembler::Condition cond = JSOpToCondition(type, comp->jsop());
+  const Register leftreg = ToRegister(comp->getOperand(0));
+  const LAllocation* right = comp->getOperand(1);
+  const Register defreg = ToRegister(comp->getDef(0));
+
+  if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol ||
+      type == MCompare::Compare_UIntPtr ||
+      type == MCompare::Compare_RefOrNull) {
+    if (right->isConstant()) {
+      MOZ_ASSERT(type == MCompare::Compare_UIntPtr);
+      masm.cmpPtrSet(cond, leftreg, Imm32(ToInt32(right)), defreg);
+    } else {
+      masm.cmpPtrSet(cond, leftreg, ToRegister(right), defreg);
+    }
+    return;
+  }
+
+  if (right->isConstant()) {
+    masm.cmp32Set(cond, leftreg, Imm32(ToInt32(right)), defreg);
+  } else {
+    masm.cmp32Set(cond, leftreg, ToRegister(right), defreg);
+  }
+}
+
+void CodeGenerator::visitCompareAndBranch(LCompareAndBranch* comp) {
+  const MCompare* mir = comp->cmpMir();
+  const MCompare::CompareType type = mir->compareType();
+  const LAllocation* left = comp->left();
+  const LAllocation* right = comp->right();
+
+  if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol ||
+      type == MCompare::Compare_UIntPtr ||
+      type == MCompare::Compare_RefOrNull) {
+    if (right->isConstant()) {
+      MOZ_ASSERT(type == MCompare::Compare_UIntPtr);
+      masm.cmpPtr(ToRegister(left), Imm32(ToInt32(right)));
+    } else {
+      masm.cmpPtr(ToRegister(left), ToRegister(right));
+    }
+  } else if (right->isConstant()) {
+    masm.cmp32(ToRegister(left), Imm32(ToInt32(right)));
+  } else {
+    masm.cmp32(ToRegister(left), ToRegister(right));
+  }
+
+  Assembler::Condition cond = JSOpToCondition(type, comp->jsop());
+  emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void CodeGeneratorARM64::bailoutIf(Assembler::Condition condition,
+                                   LSnapshot* snapshot) {
+  encode(snapshot);
+
+  InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+  OutOfLineBailout* ool = new (alloc()) OutOfLineBailout(snapshot);
+  addOutOfLineCode(ool,
+                   new (alloc()) BytecodeSite(tree, tree->script()->code()));
+
+  masm.B(ool->entry(), condition);
+}
+
+void CodeGeneratorARM64::bailoutFrom(Label* label, LSnapshot* snapshot) {
+  MOZ_ASSERT_IF(!masm.oom(), label->used());
+  MOZ_ASSERT_IF(!masm.oom(), !label->bound());
+
+  encode(snapshot);
+
+  InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+  OutOfLineBailout* ool = new (alloc()) OutOfLineBailout(snapshot);
+  addOutOfLineCode(ool,
+                   new (alloc()) BytecodeSite(tree, tree->script()->code()));
+
+  masm.retarget(label, ool->entry());
+}
+
+void CodeGeneratorARM64::bailout(LSnapshot* snapshot) {
+  Label label;
+  masm.b(&label);
+  bailoutFrom(&label, snapshot);
+}
+
+void CodeGeneratorARM64::visitOutOfLineBailout(OutOfLineBailout* ool) {
+  masm.push(Imm32(ool->snapshot()->snapshotOffset()));
+  masm.B(&deoptLabel_);
+}
+
+void CodeGenerator::visitMinMaxD(LMinMaxD* ins) {
+  ARMFPRegister lhs(ToFloatRegister(ins->first()), 64);
+  ARMFPRegister rhs(ToFloatRegister(ins->second()), 64);
+  ARMFPRegister output(ToFloatRegister(ins->output()), 64);
+  if (ins->mir()->isMax()) {
+    masm.Fmax(output, lhs, rhs);
+  } else {
+    masm.Fmin(output, lhs, rhs);
+  }
+}
+
+void CodeGenerator::visitMinMaxF(LMinMaxF* ins) {
+  ARMFPRegister lhs(ToFloatRegister(ins->first()), 32);
+  ARMFPRegister rhs(ToFloatRegister(ins->second()), 32);
+  ARMFPRegister output(ToFloatRegister(ins->output()), 32);
+  if (ins->mir()->isMax()) {
+    masm.Fmax(output, lhs, rhs);
+  } else {
+    masm.Fmin(output, lhs, rhs);
+  }
+}
+
+template <typename T>
+static ARMRegister toWRegister(const T* a) {
+  return ARMRegister(ToRegister(a), 32);
+}
+
+template <typename T>
+static ARMRegister toXRegister(const T* a) {
+  return ARMRegister(ToRegister(a), 64);
+}
+
+Operand toWOperand(const LAllocation* a) {
+  if (a->isConstant()) {
+    return Operand(ToInt32(a));
+  }
+  return Operand(toWRegister(a));
+}
+
+vixl::CPURegister ToCPURegister(const LAllocation* a, Scalar::Type type) {
+  if (a->isFloatReg() && type == Scalar::Float64) {
+    return ARMFPRegister(ToFloatRegister(a), 64);
+  }
+  if (a->isFloatReg() && type == Scalar::Float32) {
+    return ARMFPRegister(ToFloatRegister(a), 32);
+  }
+  if (a->isGeneralReg()) {
+    return ARMRegister(ToRegister(a), 32);
+  }
+  MOZ_CRASH("Unknown LAllocation");
+}
+
+vixl::CPURegister ToCPURegister(const LDefinition* d, Scalar::Type type) {
+  return ToCPURegister(d->output(), type);
+}
+
+// Let |cond| be an ARM64 condition code that we could reasonably use in a
+// conditional branch or select following a comparison instruction.  This
+// function returns the condition to use in the case where we swap the two
+// operands of the comparison instruction.
+Assembler::Condition GetCondForSwappedOperands(Assembler::Condition cond) {
+  // EQ and NE map to themselves
+  // Of the remaining 14 cases, 4 other pairings can meaningfully swap:
+  // HS -- LS
+  // LO -- HI
+  // GE -- LE
+  // GT -- LT
+  switch (cond) {
+    case vixl::eq:
+    case vixl::ne:
+      return cond;
+    case vixl::hs:
+      return vixl::ls;
+    case vixl::ls:
+      return vixl::hs;
+    case vixl::lo:
+      return vixl::hi;
+    case vixl::hi:
+      return vixl::lo;
+    case vixl::ge:
+      return vixl::le;
+    case vixl::le:
+      return vixl::ge;
+    case vixl::gt:
+      return vixl::lt;
+    case vixl::lt:
+      return vixl::gt;
+    default:
+      MOZ_CRASH("no meaningful swapped-operand condition");
+  }
+}
+
+void CodeGenerator::visitAddI(LAddI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+
+  // Platforms with three-operand arithmetic ops don't need recovery.
+  MOZ_ASSERT(!ins->recoversInput());
+
+  if (ins->snapshot()) {
+    masm.Adds(toWRegister(dest), toWRegister(lhs), toWOperand(rhs));
+    bailoutIf(Assembler::Overflow, ins->snapshot());
+  } else {
+    masm.Add(toWRegister(dest), toWRegister(lhs), toWOperand(rhs));
+  }
+}
+
+void CodeGenerator::visitSubI(LSubI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+
+  // Platforms with three-operand arithmetic ops don't need recovery.
+  MOZ_ASSERT(!ins->recoversInput());
+
+  if (ins->snapshot()) {
+    masm.Subs(toWRegister(dest), toWRegister(lhs), toWOperand(rhs));
+    bailoutIf(Assembler::Overflow, ins->snapshot());
+  } else {
+    masm.Sub(toWRegister(dest), toWRegister(lhs), toWOperand(rhs));
+  }
+}
+
+void CodeGenerator::visitMulI(LMulI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+  MMul* mul = ins->mir();
+  MOZ_ASSERT_IF(mul->mode() == MMul::Integer,
+                !mul->canBeNegativeZero() && !mul->canOverflow());
+
+  Register lhsreg = ToRegister(lhs);
+  const ARMRegister lhsreg32 = ARMRegister(lhsreg, 32);
+  Register destreg = ToRegister(dest);
+  const ARMRegister destreg32 = ARMRegister(destreg, 32);
+
+  if (rhs->isConstant()) {
+    // Bailout on -0.0.
+    int32_t constant = ToInt32(rhs);
+    if (mul->canBeNegativeZero() && constant <= 0) {
+      Assembler::Condition bailoutCond =
+          (constant == 0) ? Assembler::LessThan : Assembler::Equal;
+      masm.Cmp(toWRegister(lhs), Operand(0));
+      bailoutIf(bailoutCond, ins->snapshot());
+    }
+
+    switch (constant) {
+      case -1:
+        masm.Negs(destreg32, Operand(lhsreg32));
+        break;  // Go to overflow check.
+      case 0:
+        masm.Mov(destreg32, wzr);
+        return;  // Avoid overflow check.
+      case 1:
+        if (destreg != lhsreg) {
+          masm.Mov(destreg32, lhsreg32);
+        }
+        return;  // Avoid overflow check.
+      case 2:
+        if (!mul->canOverflow()) {
+          masm.Add(destreg32, lhsreg32, Operand(lhsreg32));
+          return;  // Avoid overflow check.
+        }
+        masm.Adds(destreg32, lhsreg32, Operand(lhsreg32));
+        break;  // Go to overflow check.
+      default:
+        // Use shift if cannot overflow and constant is a power of 2
+        if (!mul->canOverflow() && constant > 0) {
+          int32_t shift = FloorLog2(constant);
+          if ((1 << shift) == constant) {
+            masm.Lsl(destreg32, lhsreg32, shift);
+            return;
+          }
+        }
+
+        // Otherwise, just multiply. We have to check for overflow.
+        // Negative zero was handled above.
+        Label bailout;
+        Label* onOverflow = mul->canOverflow() ? &bailout : nullptr;
+
+        vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+        const Register scratch = temps.AcquireW().asUnsized();
+
+        masm.move32(Imm32(constant), scratch);
+        masm.mul32(lhsreg, scratch, destreg, onOverflow);
+
+        if (onOverflow) {
+          MOZ_ASSERT(lhsreg != destreg);
+          bailoutFrom(&bailout, ins->snapshot());
+        }
+        return;
+    }
+
+    // Overflow check.
+    if (mul->canOverflow()) {
+      bailoutIf(Assembler::Overflow, ins->snapshot());
+    }
+  } else {
+    Register rhsreg = ToRegister(rhs);
+    const ARMRegister rhsreg32 = ARMRegister(rhsreg, 32);
+
+    Label bailout;
+    Label* onOverflow = mul->canOverflow() ? &bailout : nullptr;
+
+    if (mul->canBeNegativeZero()) {
+      // The product of two integer operands is negative zero iff one
+      // operand is zero, and the other is negative. Therefore, the
+      // sum of the two operands will also be negative (specifically,
+      // it will be the non-zero operand). If the result of the
+      // multiplication is 0, we can check the sign of the sum to
+      // determine whether we should bail out.
+
+      // This code can bailout, so lowering guarantees that the input
+      // operands are not overwritten.
+      MOZ_ASSERT(destreg != lhsreg);
+      MOZ_ASSERT(destreg != rhsreg);
+
+      // Do the multiplication.
+      masm.mul32(lhsreg, rhsreg, destreg, onOverflow);
+
+      // Set Zero flag if destreg is 0.
+      masm.test32(destreg, destreg);
+
+      // ccmn is 'conditional compare negative'.
+      // If the Zero flag is set:
+      //    perform a compare negative (compute lhs+rhs and set flags)
+      // else:
+      //    clear flags
+      masm.Ccmn(lhsreg32, rhsreg32, vixl::NoFlag, Assembler::Zero);
+
+      // Bails out if (lhs * rhs == 0) && (lhs + rhs < 0):
+      bailoutIf(Assembler::LessThan, ins->snapshot());
+
+    } else {
+      masm.mul32(lhsreg, rhsreg, destreg, onOverflow);
+    }
+    if (onOverflow) {
+      bailoutFrom(&bailout, ins->snapshot());
+    }
+  }
+}
+
+void CodeGenerator::visitDivI(LDivI* ins) {
+  const Register lhs = ToRegister(ins->lhs());
+  const Register rhs = ToRegister(ins->rhs());
+  const Register output = ToRegister(ins->output());
+
+  const ARMRegister lhs32 = toWRegister(ins->lhs());
+  const ARMRegister rhs32 = toWRegister(ins->rhs());
+  const ARMRegister temp32 = toWRegister(ins->getTemp(0));
+  const ARMRegister output32 = toWRegister(ins->output());
+
+  MDiv* mir = ins->mir();
+
+  Label done;
+
+  // Handle division by zero.
+  if (mir->canBeDivideByZero()) {
+    masm.test32(rhs, rhs);
+    if (mir->trapOnError()) {
+      Label nonZero;
+      masm.j(Assembler::NonZero, &nonZero);
+      masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+      masm.bind(&nonZero);
+    } else if (mir->canTruncateInfinities()) {
+      // Truncated division by zero is zero: (Infinity|0 = 0).
+      Label nonZero;
+      masm.j(Assembler::NonZero, &nonZero);
+      masm.Mov(output32, wzr);
+      masm.jump(&done);
+      masm.bind(&nonZero);
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Zero, ins->snapshot());
+    }
+  }
+
+  // Handle an integer overflow from (INT32_MIN / -1).
+  // The integer division gives INT32_MIN, but should be -(double)INT32_MIN.
+  if (mir->canBeNegativeOverflow()) {
+    Label notOverflow;
+
+    // Branch to handle the non-overflow cases.
+    masm.branch32(Assembler::NotEqual, lhs, Imm32(INT32_MIN), &notOverflow);
+    masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notOverflow);
+
+    // Handle overflow.
+    if (mir->trapOnError()) {
+      masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset());
+    } else if (mir->canTruncateOverflow()) {
+      // (-INT32_MIN)|0 == INT32_MIN, which is already in lhs.
+      masm.move32(lhs, output);
+      masm.jump(&done);
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailout(ins->snapshot());
+    }
+    masm.bind(&notOverflow);
+  }
+
+  // Handle negative zero: lhs == 0 && rhs < 0.
+  if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
+    Label nonZero;
+    masm.branch32(Assembler::NotEqual, lhs, Imm32(0), &nonZero);
+    masm.cmp32(rhs, Imm32(0));
+    bailoutIf(Assembler::LessThan, ins->snapshot());
+    masm.bind(&nonZero);
+  }
+
+  // Perform integer division.
+  if (mir->canTruncateRemainder()) {
+    masm.Sdiv(output32, lhs32, rhs32);
+  } else {
+    vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+    ARMRegister scratch32 = temps.AcquireW();
+
+    // ARM does not automatically calculate the remainder.
+    // The ISR suggests multiplication to determine whether a remainder exists.
+    masm.Sdiv(scratch32, lhs32, rhs32);
+    masm.Mul(temp32, scratch32, rhs32);
+    masm.Cmp(lhs32, temp32);
+    bailoutIf(Assembler::NotEqual, ins->snapshot());
+    masm.Mov(output32, scratch32);
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
+  const Register numerator = ToRegister(ins->numerator());
+  const ARMRegister numerator32 = toWRegister(ins->numerator());
+  const ARMRegister output32 = toWRegister(ins->output());
+
+  int32_t shift = ins->shift();
+  bool negativeDivisor = ins->negativeDivisor();
+  MDiv* mir = ins->mir();
+
+  if (!mir->isTruncated() && negativeDivisor) {
+    // 0 divided by a negative number returns a -0 double.
+    bailoutTest32(Assembler::Zero, numerator, numerator, ins->snapshot());
+  }
+
+  if (shift) {
+    if (!mir->isTruncated()) {
+      // If the remainder is != 0, bailout since this must be a double.
+      bailoutTest32(Assembler::NonZero, numerator,
+                    Imm32(UINT32_MAX >> (32 - shift)), ins->snapshot());
+    }
+
+    if (mir->isUnsigned()) {
+      // shift right
+      masm.Lsr(output32, numerator32, shift);
+    } else {
+      ARMRegister temp32 = numerator32;
+      // Adjust the value so that shifting produces a correctly
+      // rounded result when the numerator is negative. See 10-1
+      // "Signed Division by a Known Power of 2" in Henry
+      // S. Warren, Jr.'s Hacker's Delight.
+      if (mir->canBeNegativeDividend() && mir->isTruncated()) {
+        if (shift > 1) {
+          // Copy the sign bit of the numerator. (= (2^32 - 1) or 0)
+          masm.Asr(output32, numerator32, 31);
+          temp32 = output32;
+        }
+        // Divide by 2^(32 - shift)
+        // i.e. (= (2^32 - 1) / 2^(32 - shift) or 0)
+        // i.e. (= (2^shift - 1) or 0)
+        masm.Lsr(output32, temp32, 32 - shift);
+        // If signed, make any 1 bit below the shifted bits to bubble up, such
+        // that once shifted the value would be rounded towards 0.
+        masm.Add(output32, output32, numerator32);
+        temp32 = output32;
+      }
+      masm.Asr(output32, temp32, shift);
+
+      if (negativeDivisor) {
+        masm.Neg(output32, output32);
+      }
+    }
+    return;
+  }
+
+  if (negativeDivisor) {
+    // INT32_MIN / -1 overflows.
+    if (!mir->isTruncated()) {
+      masm.Negs(output32, numerator32);
+      bailoutIf(Assembler::Overflow, ins->snapshot());
+    } else if (mir->trapOnError()) {
+      Label ok;
+      masm.Negs(output32, numerator32);
+      masm.branch(Assembler::NoOverflow, &ok);
+      masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset());
+      masm.bind(&ok);
+    } else {
+      // Do not set condition flags.
+      masm.Neg(output32, numerator32);
+    }
+  } else {
+    if (mir->isUnsigned() && !mir->isTruncated()) {
+      // Copy and set flags.
+      masm.Adds(output32, numerator32, 0);
+      // Unsigned division by 1 can overflow if output is not truncated, as we
+      // do not have an Unsigned type for MIR instructions.
+      bailoutIf(Assembler::Signed, ins->snapshot());
+    } else {
+      // Copy the result.
+      masm.Mov(output32, numerator32);
+    }
+  }
+}
+
+void CodeGenerator::visitDivConstantI(LDivConstantI* ins) {
+  const ARMRegister lhs32 = toWRegister(ins->numerator());
+  const ARMRegister lhs64 = toXRegister(ins->numerator());
+  const ARMRegister const32 = toWRegister(ins->temp());
+  const ARMRegister output32 = toWRegister(ins->output());
+  const ARMRegister output64 = toXRegister(ins->output());
+  int32_t d = ins->denominator();
+
+  // The absolute value of the denominator isn't a power of 2.
+  using mozilla::Abs;
+  MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0);
+
+  // We will first divide by Abs(d), and negate the answer if d is negative.
+  // If desired, this can be avoided by generalizing computeDivisionConstants.
+  auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(Abs(d));
+
+  // We first compute (M * n) >> 32, where M = rmc.multiplier.
+  masm.Mov(const32, int32_t(rmc.multiplier));
+  if (rmc.multiplier > INT32_MAX) {
+    MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32));
+
+    // We actually compute (int32_t(M) * n) instead, without the upper bit.
+    // Thus, (M * n) = (int32_t(M) * n) + n << 32.
+    //
+    // ((int32_t(M) * n) + n << 32) can't overflow, as both operands have
+    // opposite signs because int32_t(M) is negative.
+    masm.Lsl(output64, lhs64, 32);
+
+    // Store (M * n) in output64.
+    masm.Smaddl(output64, const32, lhs32, output64);
+  } else {
+    // Store (M * n) in output64.
+    masm.Smull(output64, const32, lhs32);
+  }
+
+  // (M * n) >> (32 + shift) is the truncated division answer if n is
+  // non-negative, as proved in the comments of computeDivisionConstants. We
+  // must add 1 later if n is negative to get the right answer in all cases.
+  masm.Asr(output64, output64, 32 + rmc.shiftAmount);
+
+  // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
+  // computed with just a sign-extending shift of 31 bits.
+  if (ins->canBeNegativeDividend()) {
+    masm.Asr(const32, lhs32, 31);
+    masm.Sub(output32, output32, const32);
+  }
+
+  // After this, output32 contains the correct truncated division result.
+  if (d < 0) {
+    masm.Neg(output32, output32);
+  }
+
+  if (!ins->mir()->isTruncated()) {
+    // This is a division op. Multiply the obtained value by d to check if
+    // the correct answer is an integer. This cannot overflow, since |d| > 1.
+    masm.Mov(const32, d);
+    masm.Msub(const32, output32, const32, lhs32);
+    // bailout if (lhs - output * d != 0)
+    masm.Cmp(const32, wzr);
+    auto bailoutCond = Assembler::NonZero;
+
+    // If lhs is zero and the divisor is negative, the answer should have
+    // been -0.
+    if (d < 0) {
+      // or bailout if (lhs == 0).
+      // ^                  ^
+      // |                  '-- masm.Ccmp(lhs32, lhs32, .., ..)
+      // '-- masm.Ccmp(.., .., vixl::ZFlag, ! bailoutCond)
+      masm.Ccmp(lhs32, wzr, vixl::ZFlag, Assembler::Zero);
+      bailoutCond = Assembler::Zero;
+    }
+
+    // bailout if (lhs - output * d != 0) or (d < 0 && lhs == 0)
+    bailoutIf(bailoutCond, ins->snapshot());
+  }
+}
+
+void CodeGenerator::visitUDivConstantI(LUDivConstantI* ins) {
+  const ARMRegister lhs32 = toWRegister(ins->numerator());
+  const ARMRegister lhs64 = toXRegister(ins->numerator());
+  const ARMRegister const32 = toWRegister(ins->temp());
+  const ARMRegister output32 = toWRegister(ins->output());
+  const ARMRegister output64 = toXRegister(ins->output());
+  uint32_t d = ins->denominator();
+
+  if (d == 0) {
+    if (ins->mir()->isTruncated()) {
+      if (ins->mir()->trapOnError()) {
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero,
+                      ins->mir()->bytecodeOffset());
+      } else {
+        masm.Mov(output32, wzr);
+      }
+    } else {
+      bailout(ins->snapshot());
+    }
+    return;
+  }
+
+  // The denominator isn't a power of 2 (see LDivPowTwoI).
+  MOZ_ASSERT((d & (d - 1)) != 0);
+
+  auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d);
+
+  // We first compute (M * n), where M = rmc.multiplier.
+  masm.Mov(const32, int32_t(rmc.multiplier));
+  masm.Umull(output64, const32, lhs32);
+  if (rmc.multiplier > UINT32_MAX) {
+    // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that
+    // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d,
+    // contradicting the proof of correctness in computeDivisionConstants.
+    MOZ_ASSERT(rmc.shiftAmount > 0);
+    MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33));
+
+    // We actually compute (uint32_t(M) * n) instead, without the upper bit.
+    // Thus, (M * n) = (uint32_t(M) * n) + n << 32.
+    //
+    // ((uint32_t(M) * n) + n << 32) can overflow. Hacker's Delight explains a
+    // trick to avoid this overflow case, but we can avoid it by computing the
+    // addition on 64 bits registers.
+    //
+    // Compute ((uint32_t(M) * n) >> 32 + n)
+    masm.Add(output64, lhs64, Operand(output64, vixl::LSR, 32));
+
+    // (M * n) >> (32 + shift) is the truncated division answer.
+    masm.Lsr(output64, output64, rmc.shiftAmount);
+  } else {
+    // (M * n) >> (32 + shift) is the truncated division answer.
+    masm.Lsr(output64, output64, 32 + rmc.shiftAmount);
+  }
+
+  // We now have the truncated division value. We are checking whether the
+  // division resulted in an integer, we multiply the obtained value by d and
+  // check the remainder of the division.
+  if (!ins->mir()->isTruncated()) {
+    masm.Mov(const32, d);
+    masm.Msub(const32, output32, const32, lhs32);
+    // bailout if (lhs - output * d != 0)
+    masm.Cmp(const32, const32);
+    bailoutIf(Assembler::NonZero, ins->snapshot());
+  }
+}
+
+void CodeGenerator::visitModI(LModI* ins) {
+  ARMRegister lhs = toWRegister(ins->lhs());
+  ARMRegister rhs = toWRegister(ins->rhs());
+  ARMRegister output = toWRegister(ins->output());
+  Label done;
+
+  MMod* mir = ins->mir();
+
+  // Prevent divide by zero.
+  if (mir->canBeDivideByZero()) {
+    if (mir->isTruncated()) {
+      if (mir->trapOnError()) {
+        Label nonZero;
+        masm.Cbnz(rhs, &nonZero);
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+        masm.bind(&nonZero);
+      } else {
+        // Truncated division by zero yields integer zero.
+        masm.Mov(output, rhs);
+        masm.Cbz(rhs, &done);
+      }
+    } else {
+      // Non-truncated division by zero produces a non-integer.
+      MOZ_ASSERT(!gen->compilingWasm());
+      masm.Cmp(rhs, Operand(0));
+      bailoutIf(Assembler::Equal, ins->snapshot());
+    }
+  }
+
+  // Signed division.
+  masm.Sdiv(output, lhs, rhs);
+
+  // Compute the remainder: output = lhs - (output * rhs).
+  masm.Msub(output, output, rhs, lhs);
+
+  if (mir->canBeNegativeDividend() && !mir->isTruncated()) {
+    // If output == 0 and lhs < 0, then the result should be double -0.0.
+    // Note that this guard handles lhs == INT_MIN and rhs == -1:
+    //   output = INT_MIN - (INT_MIN / -1) * -1
+    //          = INT_MIN - INT_MIN
+    //          = 0
+    masm.Cbnz(output, &done);
+    bailoutCmp32(Assembler::LessThan, lhs, Imm32(0), ins->snapshot());
+  }
+
+  if (done.used()) {
+    masm.bind(&done);
+  }
+}
+
+void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) {
+  Register lhs = ToRegister(ins->getOperand(0));
+  ARMRegister lhsw = toWRegister(ins->getOperand(0));
+  ARMRegister outw = toWRegister(ins->output());
+
+  int32_t shift = ins->shift();
+  bool canBeNegative =
+      !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend();
+
+  Label negative;
+  if (canBeNegative) {
+    // Switch based on sign of the lhs.
+    // Positive numbers are just a bitmask.
+    masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
+  }
+
+  masm.And(outw, lhsw, Operand((uint32_t(1) << shift) - 1));
+
+  if (canBeNegative) {
+    Label done;
+    masm.jump(&done);
+
+    // Negative numbers need a negate, bitmask, negate.
+    masm.bind(&negative);
+    masm.Neg(outw, Operand(lhsw));
+    masm.And(outw, outw, Operand((uint32_t(1) << shift) - 1));
+
+    // Since a%b has the same sign as b, and a is negative in this branch,
+    // an answer of 0 means the correct result is actually -0. Bail out.
+    if (!ins->mir()->isTruncated()) {
+      masm.Negs(outw, Operand(outw));
+      bailoutIf(Assembler::Zero, ins->snapshot());
+    } else {
+      masm.Neg(outw, Operand(outw));
+    }
+
+    masm.bind(&done);
+  }
+}
+
+void CodeGenerator::visitModMaskI(LModMaskI* ins) {
+  MMod* mir = ins->mir();
+  int32_t shift = ins->shift();
+
+  const Register src = ToRegister(ins->getOperand(0));
+  const Register dest = ToRegister(ins->getDef(0));
+  const Register hold = ToRegister(ins->getTemp(0));
+  const Register remain = ToRegister(ins->getTemp(1));
+
+  const ARMRegister src32 = ARMRegister(src, 32);
+  const ARMRegister dest32 = ARMRegister(dest, 32);
+  const ARMRegister remain32 = ARMRegister(remain, 32);
+
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  const ARMRegister scratch32 = temps.AcquireW();
+  const Register scratch = scratch32.asUnsized();
+
+  // We wish to compute x % (1<<y) - 1 for a known constant, y.
+  //
+  // 1. Let b = (1<<y) and C = (1<<y)-1, then think of the 32 bit dividend as
+  // a number in base b, namely c_0*1 + c_1*b + c_2*b^2 ... c_n*b^n
+  //
+  // 2. Since both addition and multiplication commute with modulus:
+  //   x % C == (c_0 + c_1*b + ... + c_n*b^n) % C ==
+  //    (c_0 % C) + (c_1%C) * (b % C) + (c_2 % C) * (b^2 % C)...
+  //
+  // 3. Since b == C + 1, b % C == 1, and b^n % C == 1 the whole thing
+  // simplifies to: c_0 + c_1 + c_2 ... c_n % C
+  //
+  // Each c_n can easily be computed by a shift/bitextract, and the modulus
+  // can be maintained by simply subtracting by C whenever the number gets
+  // over C.
+  int32_t mask = (1 << shift) - 1;
+  Label loop;
+
+  // Register 'hold' holds -1 if the value was negative, 1 otherwise.
+  // The remain reg holds the remaining bits that have not been processed.
+  // The scratch reg serves as a temporary location to store extracted bits.
+  // The dest reg is the accumulator, becoming final result.
+  //
+  // Move the whole value into the remain.
+  masm.Mov(remain32, src32);
+  // Zero out the dest.
+  masm.Mov(dest32, wzr);
+  // Set the hold appropriately.
+  {
+    Label negative;
+    masm.branch32(Assembler::Signed, remain, Imm32(0), &negative);
+    masm.move32(Imm32(1), hold);
+    masm.jump(&loop);
+
+    masm.bind(&negative);
+    masm.move32(Imm32(-1), hold);
+    masm.neg32(remain);
+  }
+
+  // Begin the main loop.
+  masm.bind(&loop);
+  {
+    // Extract the bottom bits into scratch.
+    masm.And(scratch32, remain32, Operand(mask));
+    // Add those bits to the accumulator.
+    masm.Add(dest32, dest32, scratch32);
+    // Do a trial subtraction. This functions as a cmp but remembers the result.
+    masm.Subs(scratch32, dest32, Operand(mask));
+    // If (sum - C) > 0, store sum - C back into sum, thus performing a modulus.
+    {
+      Label sumSigned;
+      masm.branch32(Assembler::Signed, scratch, scratch, &sumSigned);
+      masm.Mov(dest32, scratch32);
+      masm.bind(&sumSigned);
+    }
+    // Get rid of the bits that we extracted before.
+    masm.Lsr(remain32, remain32, shift);
+    // If the shift produced zero, finish, otherwise, continue in the loop.
+    masm.branchTest32(Assembler::NonZero, remain, remain, &loop);
+  }
+
+  // Check the hold to see if we need to negate the result.
+  {
+    Label done;
+
+    // If the hold was non-zero, negate the result to match JS expectations.
+    masm.branchTest32(Assembler::NotSigned, hold, hold, &done);
+    if (mir->canBeNegativeDividend() && !mir->isTruncated()) {
+      // Bail in case of negative zero hold.
+      bailoutTest32(Assembler::Zero, hold, hold, ins->snapshot());
+    }
+
+    masm.neg32(dest);
+    masm.bind(&done);
+  }
+}
+
+void CodeGeneratorARM64::emitBigIntDiv(LBigIntDiv* ins, Register dividend,
+                                       Register divisor, Register output,
+                                       Label* fail) {
+  // Callers handle division by zero and integer overflow.
+
+  const ARMRegister dividend64(dividend, 64);
+  const ARMRegister divisor64(divisor, 64);
+
+  masm.Sdiv(/* result= */ dividend64, dividend64, divisor64);
+
+  // Create and return the result.
+  masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+  masm.initializeBigInt(output, dividend);
+}
+
+void CodeGeneratorARM64::emitBigIntMod(LBigIntMod* ins, Register dividend,
+                                       Register divisor, Register output,
+                                       Label* fail) {
+  // Callers handle division by zero and integer overflow.
+
+  const ARMRegister dividend64(dividend, 64);
+  const ARMRegister divisor64(divisor, 64);
+  const ARMRegister output64(output, 64);
+
+  // Signed division.
+  masm.Sdiv(output64, dividend64, divisor64);
+
+  // Compute the remainder: output = dividend - (output * divisor).
+  masm.Msub(/* result= */ dividend64, output64, divisor64, dividend64);
+
+  // Create and return the result.
+  masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+  masm.initializeBigInt(output, dividend);
+}
+
+void CodeGenerator::visitBitNotI(LBitNotI* ins) {
+  const LAllocation* input = ins->getOperand(0);
+  const LDefinition* output = ins->getDef(0);
+  masm.Mvn(toWRegister(output), toWOperand(input));
+}
+
+void CodeGenerator::visitBitNotI64(LBitNotI64* ins) {
+  Register input = ToRegister(ins->input());
+  Register output = ToRegister(ins->output());
+  masm.Mvn(vixl::Register(output, 64), vixl::Register(input, 64));
+}
+
+void CodeGenerator::visitBitOpI(LBitOpI* ins) {
+  const ARMRegister lhs = toWRegister(ins->getOperand(0));
+  const Operand rhs = toWOperand(ins->getOperand(1));
+  const ARMRegister dest = toWRegister(ins->getDef(0));
+
+  switch (ins->bitop()) {
+    case JSOp::BitOr:
+      masm.Orr(dest, lhs, rhs);
+      break;
+    case JSOp::BitXor:
+      masm.Eor(dest, lhs, rhs);
+      break;
+    case JSOp::BitAnd:
+      masm.And(dest, lhs, rhs);
+      break;
+    default:
+      MOZ_CRASH("unexpected binary opcode");
+  }
+}
+
+void CodeGenerator::visitShiftI(LShiftI* ins) {
+  const ARMRegister lhs = toWRegister(ins->lhs());
+  const LAllocation* rhs = ins->rhs();
+  const ARMRegister dest = toWRegister(ins->output());
+
+  if (rhs->isConstant()) {
+    int32_t shift = ToInt32(rhs) & 0x1F;
+    switch (ins->bitop()) {
+      case JSOp::Lsh:
+        masm.Lsl(dest, lhs, shift);
+        break;
+      case JSOp::Rsh:
+        masm.Asr(dest, lhs, shift);
+        break;
+      case JSOp::Ursh:
+        if (shift) {
+          masm.Lsr(dest, lhs, shift);
+        } else if (ins->mir()->toUrsh()->fallible()) {
+          // x >>> 0 can overflow.
+          masm.Ands(dest, lhs, Operand(0xFFFFFFFF));
+          bailoutIf(Assembler::Signed, ins->snapshot());
+        } else {
+          masm.Mov(dest, lhs);
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  } else {
+    const ARMRegister rhsreg = toWRegister(rhs);
+    switch (ins->bitop()) {
+      case JSOp::Lsh:
+        masm.Lsl(dest, lhs, rhsreg);
+        break;
+      case JSOp::Rsh:
+        masm.Asr(dest, lhs, rhsreg);
+        break;
+      case JSOp::Ursh:
+        masm.Lsr(dest, lhs, rhsreg);
+        if (ins->mir()->toUrsh()->fallible()) {
+          /// x >>> 0 can overflow.
+          masm.Cmp(dest, Operand(0));
+          bailoutIf(Assembler::LessThan, ins->snapshot());
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  }
+}
+
+void CodeGenerator::visitUrshD(LUrshD* ins) {
+  const ARMRegister lhs = toWRegister(ins->lhs());
+  const LAllocation* rhs = ins->rhs();
+  const FloatRegister out = ToFloatRegister(ins->output());
+
+  const Register temp = ToRegister(ins->temp());
+  const ARMRegister temp32 = toWRegister(ins->temp());
+
+  if (rhs->isConstant()) {
+    int32_t shift = ToInt32(rhs) & 0x1F;
+    if (shift) {
+      masm.Lsr(temp32, lhs, shift);
+      masm.convertUInt32ToDouble(temp, out);
+    } else {
+      masm.convertUInt32ToDouble(ToRegister(ins->lhs()), out);
+    }
+  } else {
+    masm.And(temp32, toWRegister(rhs), Operand(0x1F));
+    masm.Lsr(temp32, lhs, temp32);
+    masm.convertUInt32ToDouble(temp, out);
+  }
+}
+
+void CodeGenerator::visitPowHalfD(LPowHalfD* ins) {
+  FloatRegister input = ToFloatRegister(ins->input());
+  FloatRegister output = ToFloatRegister(ins->output());
+
+  ScratchDoubleScope scratch(masm);
+
+  Label done, sqrt;
+
+  if (!ins->mir()->operandIsNeverNegativeInfinity()) {
+    // Branch if not -Infinity.
+    masm.loadConstantDouble(NegativeInfinity<double>(), scratch);
+
+    Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered;
+    if (ins->mir()->operandIsNeverNaN()) {
+      cond = Assembler::DoubleNotEqual;
+    }
+    masm.branchDouble(cond, input, scratch, &sqrt);
+
+    // Math.pow(-Infinity, 0.5) == Infinity.
+    masm.zeroDouble(output);
+    masm.subDouble(scratch, output);
+    masm.jump(&done);
+
+    masm.bind(&sqrt);
+  }
+
+  if (!ins->mir()->operandIsNeverNegativeZero()) {
+    // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5).
+    // Adding 0 converts any -0 to 0.
+    masm.zeroDouble(scratch);
+    masm.addDouble(input, scratch);
+    masm.sqrtDouble(scratch, output);
+  } else {
+    masm.sqrtDouble(input, output);
+  }
+
+  masm.bind(&done);
+}
+
+MoveOperand CodeGeneratorARM64::toMoveOperand(const LAllocation a) const {
+  if (a.isGeneralReg()) {
+    return MoveOperand(ToRegister(a));
+  }
+  if (a.isFloatReg()) {
+    return MoveOperand(ToFloatRegister(a));
+  }
+  MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress
+                                           : MoveOperand::Kind::Memory;
+  return MoveOperand(ToAddress(a), kind);
+}
+
+class js::jit::OutOfLineTableSwitch
+    : public OutOfLineCodeBase<CodeGeneratorARM64> {
+  MTableSwitch* mir_;
+  CodeLabel jumpLabel_;
+
+  void accept(CodeGeneratorARM64* codegen) override {
+    codegen->visitOutOfLineTableSwitch(this);
+  }
+
+ public:
+  explicit OutOfLineTableSwitch(MTableSwitch* mir) : mir_(mir) {}
+
+  MTableSwitch* mir() const { return mir_; }
+
+  CodeLabel* jumpLabel() { return &jumpLabel_; }
+};
+
+void CodeGeneratorARM64::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool) {
+  MTableSwitch* mir = ool->mir();
+
+  // Prevent nop and pools sequences to appear in the jump table.
+  AutoForbidPoolsAndNops afp(
+      &masm, (mir->numCases() + 1) * (sizeof(void*) / vixl::kInstructionSize));
+  masm.haltingAlign(sizeof(void*));
+  masm.bind(ool->jumpLabel());
+  masm.addCodeLabel(*ool->jumpLabel());
+
+  for (size_t i = 0; i < mir->numCases(); i++) {
+    LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir();
+    Label* caseheader = caseblock->label();
+    uint32_t caseoffset = caseheader->offset();
+
+    // The entries of the jump table need to be absolute addresses,
+    // and thus must be patched after codegen is finished.
+    CodeLabel cl;
+    masm.writeCodePointer(&cl);
+    cl.target()->bind(caseoffset);
+    masm.addCodeLabel(cl);
+  }
+}
+
+void CodeGeneratorARM64::emitTableSwitchDispatch(MTableSwitch* mir,
+                                                 Register index,
+                                                 Register base) {
+  Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
+
+  // Let the lowest table entry be indexed at 0.
+  if (mir->low() != 0) {
+    masm.sub32(Imm32(mir->low()), index);
+  }
+
+  // Jump to the default case if input is out of range.
+  int32_t cases = mir->numCases();
+  masm.branch32(Assembler::AboveOrEqual, index, Imm32(cases), defaultcase);
+
+  // Because the target code has not yet been generated, we cannot know the
+  // instruction offsets for use as jump targets. Therefore we construct
+  // an OutOfLineTableSwitch that winds up holding the jump table.
+  //
+  // Because the jump table is generated as part of out-of-line code,
+  // it is generated after all the regular codegen, so the jump targets
+  // are guaranteed to exist when generating the jump table.
+  OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(mir);
+  addOutOfLineCode(ool, mir);
+
+  // Use the index to get the address of the jump target from the table.
+  masm.mov(ool->jumpLabel(), base);
+  BaseIndex pointer(base, index, ScalePointer);
+
+  // Load the target from the jump table and branch to it.
+  masm.branchToComputedAddress(pointer);
+}
+
+void CodeGenerator::visitMathD(LMathD* math) {
+  ARMFPRegister lhs(ToFloatRegister(math->lhs()), 64);
+  ARMFPRegister rhs(ToFloatRegister(math->rhs()), 64);
+  ARMFPRegister output(ToFloatRegister(math->output()), 64);
+
+  switch (math->jsop()) {
+    case JSOp::Add:
+      masm.Fadd(output, lhs, rhs);
+      break;
+    case JSOp::Sub:
+      masm.Fsub(output, lhs, rhs);
+      break;
+    case JSOp::Mul:
+      masm.Fmul(output, lhs, rhs);
+      break;
+    case JSOp::Div:
+      masm.Fdiv(output, lhs, rhs);
+      break;
+    default:
+      MOZ_CRASH("unexpected opcode");
+  }
+}
+
+void CodeGenerator::visitMathF(LMathF* math) {
+  ARMFPRegister lhs(ToFloatRegister(math->lhs()), 32);
+  ARMFPRegister rhs(ToFloatRegister(math->rhs()), 32);
+  ARMFPRegister output(ToFloatRegister(math->output()), 32);
+
+  switch (math->jsop()) {
+    case JSOp::Add:
+      masm.Fadd(output, lhs, rhs);
+      break;
+    case JSOp::Sub:
+      masm.Fsub(output, lhs, rhs);
+      break;
+    case JSOp::Mul:
+      masm.Fmul(output, lhs, rhs);
+      break;
+    case JSOp::Div:
+      masm.Fdiv(output, lhs, rhs);
+      break;
+    default:
+      MOZ_CRASH("unexpected opcode");
+  }
+}
+
+void CodeGenerator::visitClzI(LClzI* lir) {
+  ARMRegister input = toWRegister(lir->input());
+  ARMRegister output = toWRegister(lir->output());
+  masm.Clz(output, input);
+}
+
+void CodeGenerator::visitCtzI(LCtzI* lir) {
+  Register input = ToRegister(lir->input());
+  Register output = ToRegister(lir->output());
+  masm.ctz32(input, output, /* knownNotZero = */ false);
+}
+
+void CodeGenerator::visitTruncateDToInt32(LTruncateDToInt32* ins) {
+  emitTruncateDouble(ToFloatRegister(ins->input()), ToRegister(ins->output()),
+                     ins->mir());
+}
+
+void CodeGenerator::visitNearbyInt(LNearbyInt* lir) {
+  FloatRegister input = ToFloatRegister(lir->input());
+  FloatRegister output = ToFloatRegister(lir->output());
+
+  RoundingMode roundingMode = lir->mir()->roundingMode();
+  masm.nearbyIntDouble(roundingMode, input, output);
+}
+
+void CodeGenerator::visitNearbyIntF(LNearbyIntF* lir) {
+  FloatRegister input = ToFloatRegister(lir->input());
+  FloatRegister output = ToFloatRegister(lir->output());
+
+  RoundingMode roundingMode = lir->mir()->roundingMode();
+  masm.nearbyIntFloat32(roundingMode, input, output);
+}
+
+void CodeGenerator::visitWasmBuiltinTruncateDToInt32(
+    LWasmBuiltinTruncateDToInt32* lir) {
+  emitTruncateDouble(ToFloatRegister(lir->getOperand(0)),
+                     ToRegister(lir->getDef(0)), lir->mir());
+}
+
+void CodeGenerator::visitTruncateFToInt32(LTruncateFToInt32* ins) {
+  emitTruncateFloat32(ToFloatRegister(ins->input()), ToRegister(ins->output()),
+                      ins->mir());
+}
+
+void CodeGenerator::visitWasmBuiltinTruncateFToInt32(
+    LWasmBuiltinTruncateFToInt32* lir) {
+  emitTruncateFloat32(ToFloatRegister(lir->getOperand(0)),
+                      ToRegister(lir->getDef(0)), lir->mir());
+}
+
+ValueOperand CodeGeneratorARM64::ToValue(LInstruction* ins, size_t pos) {
+  return ValueOperand(ToRegister(ins->getOperand(pos)));
+}
+
+ValueOperand CodeGeneratorARM64::ToTempValue(LInstruction* ins, size_t pos) {
+  MOZ_CRASH("CodeGeneratorARM64::ToTempValue");
+}
+
+void CodeGenerator::visitValue(LValue* value) {
+  ValueOperand result = ToOutValue(value);
+  masm.moveValue(value->value(), result);
+}
+
+void CodeGenerator::visitBox(LBox* box) {
+  const LAllocation* in = box->getOperand(0);
+  ValueOperand result = ToOutValue(box);
+
+  masm.moveValue(TypedOrValueRegister(box->type(), ToAnyRegister(in)), result);
+}
+
+void CodeGenerator::visitUnbox(LUnbox* unbox) {
+  MUnbox* mir = unbox->mir();
+
+  Register result = ToRegister(unbox->output());
+
+  if (mir->fallible()) {
+    const ValueOperand value = ToValue(unbox, LUnbox::Input);
+    Label bail;
+    switch (mir->type()) {
+      case MIRType::Int32:
+        masm.fallibleUnboxInt32(value, result, &bail);
+        break;
+      case MIRType::Boolean:
+        masm.fallibleUnboxBoolean(value, result, &bail);
+        break;
+      case MIRType::Object:
+        masm.fallibleUnboxObject(value, result, &bail);
+        break;
+      case MIRType::String:
+        masm.fallibleUnboxString(value, result, &bail);
+        break;
+      case MIRType::Symbol:
+        masm.fallibleUnboxSymbol(value, result, &bail);
+        break;
+      case MIRType::BigInt:
+        masm.fallibleUnboxBigInt(value, result, &bail);
+        break;
+      default:
+        MOZ_CRASH("Given MIRType cannot be unboxed.");
+    }
+    bailoutFrom(&bail, unbox->snapshot());
+    return;
+  }
+
+  // Infallible unbox.
+
+  ValueOperand input = ToValue(unbox, LUnbox::Input);
+
+#ifdef DEBUG
+  // Assert the types match.
+  JSValueTag tag = MIRTypeToTag(mir->type());
+  Label ok;
+  {
+    ScratchTagScope scratch(masm, input);
+    masm.splitTagForTest(input, scratch);
+    masm.cmpTag(scratch, ImmTag(tag));
+  }
+  masm.B(&ok, Assembler::Condition::Equal);
+  masm.assumeUnreachable("Infallible unbox type mismatch");
+  masm.bind(&ok);
+#endif
+
+  switch (mir->type()) {
+    case MIRType::Int32:
+      masm.unboxInt32(input, result);
+      break;
+    case MIRType::Boolean:
+      masm.unboxBoolean(input, result);
+      break;
+    case MIRType::Object:
+      masm.unboxObject(input, result);
+      break;
+    case MIRType::String:
+      masm.unboxString(input, result);
+      break;
+    case MIRType::Symbol:
+      masm.unboxSymbol(input, result);
+      break;
+    case MIRType::BigInt:
+      masm.unboxBigInt(input, result);
+      break;
+    default:
+      MOZ_CRASH("Given MIRType cannot be unboxed.");
+  }
+}
+
+void CodeGenerator::visitDouble(LDouble* ins) {
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantDouble(ins->value(), ToFloatRegister(out));
+}
+
+void CodeGenerator::visitFloat32(LFloat32* ins) {
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantFloat32(ins->value(), ToFloatRegister(out));
+}
+
+void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) {
+  const LAllocation* opd = test->input();
+  MBasicBlock* ifTrue = test->ifTrue();
+  MBasicBlock* ifFalse = test->ifFalse();
+
+  masm.Fcmp(ARMFPRegister(ToFloatRegister(opd), 64), 0.0);
+
+  // If the compare set the 0 bit, then the result is definitely false.
+  jumpToBlock(ifFalse, Assembler::Zero);
+
+  // Overflow means one of the operands was NaN, which is also false.
+  jumpToBlock(ifFalse, Assembler::Overflow);
+  jumpToBlock(ifTrue);
+}
+
+void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) {
+  const LAllocation* opd = test->input();
+  MBasicBlock* ifTrue = test->ifTrue();
+  MBasicBlock* ifFalse = test->ifFalse();
+
+  masm.Fcmp(ARMFPRegister(ToFloatRegister(opd), 32), 0.0);
+
+  // If the compare set the 0 bit, then the result is definitely false.
+  jumpToBlock(ifFalse, Assembler::Zero);
+
+  // Overflow means one of the operands was NaN, which is also false.
+  jumpToBlock(ifFalse, Assembler::Overflow);
+  jumpToBlock(ifTrue);
+}
+
+void CodeGenerator::visitCompareD(LCompareD* comp) {
+  const FloatRegister left = ToFloatRegister(comp->left());
+  const FloatRegister right = ToFloatRegister(comp->right());
+  ARMRegister output = toWRegister(comp->output());
+  Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+
+  masm.compareDouble(cond, left, right);
+  masm.cset(output, Assembler::ConditionFromDoubleCondition(cond));
+}
+
+void CodeGenerator::visitCompareF(LCompareF* comp) {
+  const FloatRegister left = ToFloatRegister(comp->left());
+  const FloatRegister right = ToFloatRegister(comp->right());
+  ARMRegister output = toWRegister(comp->output());
+  Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+
+  masm.compareFloat(cond, left, right);
+  masm.cset(output, Assembler::ConditionFromDoubleCondition(cond));
+}
+
+void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) {
+  const FloatRegister left = ToFloatRegister(comp->left());
+  const FloatRegister right = ToFloatRegister(comp->right());
+  Assembler::DoubleCondition doubleCond =
+      JSOpToDoubleCondition(comp->cmpMir()->jsop());
+  Assembler::Condition cond =
+      Assembler::ConditionFromDoubleCondition(doubleCond);
+
+  masm.compareDouble(doubleCond, left, right);
+  emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) {
+  const FloatRegister left = ToFloatRegister(comp->left());
+  const FloatRegister right = ToFloatRegister(comp->right());
+  Assembler::DoubleCondition doubleCond =
+      JSOpToDoubleCondition(comp->cmpMir()->jsop());
+  Assembler::Condition cond =
+      Assembler::ConditionFromDoubleCondition(doubleCond);
+
+  masm.compareFloat(doubleCond, left, right);
+  emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void CodeGenerator::visitBitAndAndBranch(LBitAndAndBranch* baab) {
+  if (baab->is64()) {
+    ARMRegister regL = toXRegister(baab->left());
+    if (baab->right()->isConstant()) {
+      masm.Tst(regL, Operand(ToInt64(baab->right())));
+    } else {
+      masm.Tst(regL, toXRegister(baab->right()));
+    }
+  } else {
+    ARMRegister regL = toWRegister(baab->left());
+    if (baab->right()->isConstant()) {
+      masm.Tst(regL, Operand(ToInt32(baab->right())));
+    } else {
+      masm.Tst(regL, toWRegister(baab->right()));
+    }
+  }
+  emitBranch(baab->cond(), baab->ifTrue(), baab->ifFalse());
+}
+
+void CodeGenerator::visitWasmUint32ToDouble(LWasmUint32ToDouble* lir) {
+  masm.convertUInt32ToDouble(ToRegister(lir->input()),
+                             ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitWasmUint32ToFloat32(LWasmUint32ToFloat32* lir) {
+  masm.convertUInt32ToFloat32(ToRegister(lir->input()),
+                              ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitNotI(LNotI* ins) {
+  ARMRegister input = toWRegister(ins->input());
+  ARMRegister output = toWRegister(ins->output());
+
+  masm.Cmp(input, ZeroRegister32);
+  masm.Cset(output, Assembler::Zero);
+}
+
+//        NZCV
+// NAN -> 0011
+// ==  -> 0110
+// <   -> 1000
+// >   -> 0010
+void CodeGenerator::visitNotD(LNotD* ins) {
+  ARMFPRegister input(ToFloatRegister(ins->input()), 64);
+  ARMRegister output = toWRegister(ins->output());
+
+  // Set output to 1 if input compares equal to 0.0, else 0.
+  masm.Fcmp(input, 0.0);
+  masm.Cset(output, Assembler::Equal);
+
+  // Comparison with NaN sets V in the NZCV register.
+  // If the input was NaN, output must now be zero, so it can be incremented.
+  // The instruction is read: "output = if NoOverflow then output else 0+1".
+  masm.Csinc(output, output, ZeroRegister32, Assembler::NoOverflow);
+}
+
+void CodeGenerator::visitNotF(LNotF* ins) {
+  ARMFPRegister input(ToFloatRegister(ins->input()), 32);
+  ARMRegister output = toWRegister(ins->output());
+
+  // Set output to 1 input compares equal to 0.0, else 0.
+  masm.Fcmp(input, 0.0);
+  masm.Cset(output, Assembler::Equal);
+
+  // Comparison with NaN sets V in the NZCV register.
+  // If the input was NaN, output must now be zero, so it can be incremented.
+  // The instruction is read: "output = if NoOverflow then output else 0+1".
+  masm.Csinc(output, output, ZeroRegister32, Assembler::NoOverflow);
+}
+
+void CodeGeneratorARM64::generateInvalidateEpilogue() {
+  // Ensure that there is enough space in the buffer for the OsiPoint patching
+  // to occur. Otherwise, we could overwrite the invalidation epilogue.
+  for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) {
+    masm.nop();
+  }
+
+  masm.bind(&invalidate_);
+
+  // Push the return address of the point that we bailout out onto the stack.
+  masm.push(lr);
+
+  // Push the Ion script onto the stack (when we determine what that pointer
+  // is).
+  invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
+
+  // Jump to the invalidator which will replace the current frame.
+  TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk();
+  masm.jump(thunk);
+}
+
+template <class U>
+Register getBase(U* mir) {
+  switch (mir->base()) {
+    case U::Heap:
+      return HeapReg;
+  }
+  return InvalidReg;
+}
+
+void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) {
+  const MAsmJSLoadHeap* mir = ins->mir();
+  MOZ_ASSERT(!mir->hasMemoryBase());
+
+  const LAllocation* ptr = ins->ptr();
+  const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+  Register ptrReg = ToRegister(ptr);
+  Scalar::Type accessType = mir->accessType();
+  bool isFloat = accessType == Scalar::Float32 || accessType == Scalar::Float64;
+  Label done;
+
+  if (mir->needsBoundsCheck()) {
+    Label boundsCheckPassed;
+    Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+    masm.wasmBoundsCheck32(Assembler::Below, ptrReg, boundsCheckLimitReg,
+                           &boundsCheckPassed);
+    // Return a default value in case of a bounds-check failure.
+    if (isFloat) {
+      if (accessType == Scalar::Float32) {
+        masm.loadConstantFloat32(GenericNaN(), ToFloatRegister(ins->output()));
+      } else {
+        masm.loadConstantDouble(GenericNaN(), ToFloatRegister(ins->output()));
+      }
+    } else {
+      masm.Mov(ARMRegister(ToRegister(ins->output()), 64), 0);
+    }
+    masm.jump(&done);
+    masm.bind(&boundsCheckPassed);
+  }
+
+  MemOperand addr(ARMRegister(HeapReg, 64), ARMRegister(ptrReg, 64));
+  switch (accessType) {
+    case Scalar::Int8:
+      masm.Ldrb(toWRegister(ins->output()), addr);
+      masm.Sxtb(toWRegister(ins->output()), toWRegister(ins->output()));
+      break;
+    case Scalar::Uint8:
+      masm.Ldrb(toWRegister(ins->output()), addr);
+      break;
+    case Scalar::Int16:
+      masm.Ldrh(toWRegister(ins->output()), addr);
+      masm.Sxth(toWRegister(ins->output()), toWRegister(ins->output()));
+      break;
+    case Scalar::Uint16:
+      masm.Ldrh(toWRegister(ins->output()), addr);
+      break;
+    case Scalar::Int32:
+    case Scalar::Uint32:
+      masm.Ldr(toWRegister(ins->output()), addr);
+      break;
+    case Scalar::Float64:
+      masm.Ldr(ARMFPRegister(ToFloatRegister(ins->output()), 64), addr);
+      break;
+    case Scalar::Float32:
+      masm.Ldr(ARMFPRegister(ToFloatRegister(ins->output()), 32), addr);
+      break;
+    default:
+      MOZ_CRASH("unexpected array type");
+  }
+  if (done.used()) {
+    masm.bind(&done);
+  }
+}
+
+void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) {
+  const MAsmJSStoreHeap* mir = ins->mir();
+  MOZ_ASSERT(!mir->hasMemoryBase());
+
+  const LAllocation* ptr = ins->ptr();
+  const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+  Register ptrReg = ToRegister(ptr);
+
+  Label done;
+  if (mir->needsBoundsCheck()) {
+    Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+    masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ptrReg, boundsCheckLimitReg,
+                           &done);
+  }
+
+  MemOperand addr(ARMRegister(HeapReg, 64), ARMRegister(ptrReg, 64));
+  switch (mir->accessType()) {
+    case Scalar::Int8:
+    case Scalar::Uint8:
+      masm.Strb(toWRegister(ins->value()), addr);
+      break;
+    case Scalar::Int16:
+    case Scalar::Uint16:
+      masm.Strh(toWRegister(ins->value()), addr);
+      break;
+    case Scalar::Int32:
+    case Scalar::Uint32:
+      masm.Str(toWRegister(ins->value()), addr);
+      break;
+    case Scalar::Float64:
+      masm.Str(ARMFPRegister(ToFloatRegister(ins->value()), 64), addr);
+      break;
+    case Scalar::Float32:
+      masm.Str(ARMFPRegister(ToFloatRegister(ins->value()), 32), addr);
+      break;
+    default:
+      MOZ_CRASH("unexpected array type");
+  }
+  if (done.used()) {
+    masm.bind(&done);
+  }
+}
+
+void CodeGenerator::visitWasmCompareExchangeHeap(
+    LWasmCompareExchangeHeap* ins) {
+  MWasmCompareExchangeHeap* mir = ins->mir();
+
+  Register ptr = ToRegister(ins->ptr());
+  Register oldval = ToRegister(ins->oldValue());
+  Register newval = ToRegister(ins->newValue());
+  Register out = ToRegister(ins->output());
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset());
+
+  if (mir->access().type() == Scalar::Int64) {
+    masm.wasmCompareExchange64(mir->access(), srcAddr, Register64(oldval),
+                               Register64(newval), Register64(out));
+  } else {
+    masm.wasmCompareExchange(mir->access(), srcAddr, oldval, newval, out);
+  }
+}
+
+void CodeGenerator::visitWasmAtomicExchangeHeap(LWasmAtomicExchangeHeap* ins) {
+  MWasmAtomicExchangeHeap* mir = ins->mir();
+
+  Register ptr = ToRegister(ins->ptr());
+  Register oldval = ToRegister(ins->value());
+  Register out = ToRegister(ins->output());
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset());
+
+  if (mir->access().type() == Scalar::Int64) {
+    masm.wasmAtomicExchange64(mir->access(), srcAddr, Register64(oldval),
+                              Register64(out));
+  } else {
+    masm.wasmAtomicExchange(mir->access(), srcAddr, oldval, out);
+  }
+}
+
+void CodeGenerator::visitWasmAtomicBinopHeap(LWasmAtomicBinopHeap* ins) {
+  MWasmAtomicBinopHeap* mir = ins->mir();
+
+  MOZ_ASSERT(mir->hasUses());
+
+  Register ptr = ToRegister(ins->ptr());
+  Register value = ToRegister(ins->value());
+  Register flagTemp = ToRegister(ins->flagTemp());
+  Register out = ToRegister(ins->output());
+  MOZ_ASSERT(ins->temp()->isBogusTemp());
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset());
+  AtomicOp op = mir->operation();
+
+  if (mir->access().type() == Scalar::Int64) {
+    masm.wasmAtomicFetchOp64(mir->access(), op, Register64(value), srcAddr,
+                             Register64(flagTemp), Register64(out));
+  } else {
+    masm.wasmAtomicFetchOp(mir->access(), op, value, srcAddr, flagTemp, out);
+  }
+}
+
+void CodeGenerator::visitWasmAtomicBinopHeapForEffect(
+    LWasmAtomicBinopHeapForEffect* ins) {
+  MWasmAtomicBinopHeap* mir = ins->mir();
+
+  MOZ_ASSERT(!mir->hasUses());
+
+  Register ptr = ToRegister(ins->ptr());
+  Register value = ToRegister(ins->value());
+  Register flagTemp = ToRegister(ins->flagTemp());
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset());
+  AtomicOp op = mir->operation();
+
+  if (mir->access().type() == Scalar::Int64) {
+    masm.wasmAtomicEffectOp64(mir->access(), op, Register64(value), srcAddr,
+                              Register64(flagTemp));
+  } else {
+    masm.wasmAtomicEffectOp(mir->access(), op, value, srcAddr, flagTemp);
+  }
+}
+
+void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) {
+  const MWasmStackArg* mir = ins->mir();
+  Address dst(masm.getStackPointer(), mir->spOffset());
+  if (ins->arg()->isConstant()) {
+    masm.storePtr(ImmWord(ToInt32(ins->arg())), dst);
+  } else if (ins->arg()->isGeneralReg()) {
+    masm.storePtr(ToRegister(ins->arg()), dst);
+  } else {
+    switch (mir->input()->type()) {
+      case MIRType::Double:
+        masm.storeDouble(ToFloatRegister(ins->arg()), dst);
+        return;
+      case MIRType::Float32:
+        masm.storeFloat32(ToFloatRegister(ins->arg()), dst);
+        return;
+#ifdef ENABLE_WASM_SIMD
+      case MIRType::Simd128:
+        masm.storeUnalignedSimd128(ToFloatRegister(ins->arg()), dst);
+        return;
+#endif
+      default:
+        break;
+    }
+    MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE(
+        "unexpected mir type in WasmStackArg");
+  }
+}
+
+void CodeGenerator::visitUDiv(LUDiv* ins) {
+  MDiv* mir = ins->mir();
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+  ARMRegister lhs32 = ARMRegister(lhs, 32);
+  ARMRegister rhs32 = ARMRegister(rhs, 32);
+  ARMRegister output32 = ARMRegister(output, 32);
+
+  // Prevent divide by zero.
+  if (mir->canBeDivideByZero()) {
+    if (mir->isTruncated()) {
+      if (mir->trapOnError()) {
+        Label nonZero;
+        masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero);
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+        masm.bind(&nonZero);
+      } else {
+        // ARM64 UDIV instruction will return 0 when divided by 0.
+        // No need for extra tests.
+      }
+    } else {
+      bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot());
+    }
+  }
+
+  // Unsigned division.
+  masm.Udiv(output32, lhs32, rhs32);
+
+  // If the remainder is > 0, bailout since this must be a double.
+  if (!mir->canTruncateRemainder()) {
+    Register remainder = ToRegister(ins->remainder());
+    ARMRegister remainder32 = ARMRegister(remainder, 32);
+
+    // Compute the remainder: remainder = lhs - (output * rhs).
+    masm.Msub(remainder32, output32, rhs32, lhs32);
+
+    bailoutTest32(Assembler::NonZero, remainder, remainder, ins->snapshot());
+  }
+
+  // Unsigned div can return a value that's not a signed int32.
+  // If our users aren't expecting that, bail.
+  if (!mir->isTruncated()) {
+    bailoutTest32(Assembler::Signed, output, output, ins->snapshot());
+  }
+}
+
+void CodeGenerator::visitUMod(LUMod* ins) {
+  MMod* mir = ins->mir();
+  ARMRegister lhs = toWRegister(ins->lhs());
+  ARMRegister rhs = toWRegister(ins->rhs());
+  ARMRegister output = toWRegister(ins->output());
+  Label done;
+
+  if (mir->canBeDivideByZero()) {
+    if (mir->isTruncated()) {
+      if (mir->trapOnError()) {
+        Label nonZero;
+        masm.Cbnz(rhs, &nonZero);
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+        masm.bind(&nonZero);
+      } else {
+        // Truncated division by zero yields integer zero.
+        masm.Mov(output, rhs);
+        masm.Cbz(rhs, &done);
+      }
+    } else {
+      // Non-truncated division by zero produces a non-integer.
+      masm.Cmp(rhs, Operand(0));
+      bailoutIf(Assembler::Equal, ins->snapshot());
+    }
+  }
+
+  // Unsigned division.
+  masm.Udiv(output, lhs, rhs);
+
+  // Compute the remainder: output = lhs - (output * rhs).
+  masm.Msub(output, output, rhs, lhs);
+
+  if (!mir->isTruncated()) {
+    // Bail if the output would be negative.
+    //
+    // LUMod inputs may be Uint32, so care is taken to ensure the result
+    // is not unexpectedly signed.
+    bailoutCmp32(Assembler::LessThan, output, Imm32(0), ins->snapshot());
+  }
+
+  if (done.used()) {
+    masm.bind(&done);
+  }
+}
+
+void CodeGenerator::visitEffectiveAddress(LEffectiveAddress* ins) {
+  const MEffectiveAddress* mir = ins->mir();
+  const ARMRegister base = toWRegister(ins->base());
+  const ARMRegister index = toWRegister(ins->index());
+  const ARMRegister output = toWRegister(ins->output());
+
+  masm.Add(output, base, Operand(index, vixl::LSL, mir->scale()));
+  masm.Add(output, output, Operand(mir->displacement()));
+}
+
+void CodeGenerator::visitNegI(LNegI* ins) {
+  const ARMRegister input = toWRegister(ins->input());
+  const ARMRegister output = toWRegister(ins->output());
+  masm.Neg(output, input);
+}
+
+void CodeGenerator::visitNegI64(LNegI64* ins) {
+  const ARMRegister input = toXRegister(ins->input());
+  const ARMRegister output = toXRegister(ins->output());
+  masm.Neg(output, input);
+}
+
+void CodeGenerator::visitNegD(LNegD* ins) {
+  const ARMFPRegister input(ToFloatRegister(ins->input()), 64);
+  const ARMFPRegister output(ToFloatRegister(ins->output()), 64);
+  masm.Fneg(output, input);
+}
+
+void CodeGenerator::visitNegF(LNegF* ins) {
+  const ARMFPRegister input(ToFloatRegister(ins->input()), 32);
+  const ARMFPRegister output(ToFloatRegister(ins->output()), 32);
+  masm.Fneg(output, input);
+}
+
+void CodeGenerator::visitCompareExchangeTypedArrayElement(
+    LCompareExchangeTypedArrayElement* lir) {
+  Register elements = ToRegister(lir->elements());
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register temp =
+      lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+  Register oldval = ToRegister(lir->oldval());
+  Register newval = ToRegister(lir->newval());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
+                           newval, temp, output);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
+                           newval, temp, output);
+  }
+}
+
+void CodeGenerator::visitAtomicExchangeTypedArrayElement(
+    LAtomicExchangeTypedArrayElement* lir) {
+  Register elements = ToRegister(lir->elements());
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register temp =
+      lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+  Register value = ToRegister(lir->value());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
+                          output);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
+                          output);
+  }
+}
+
+void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register temp = ToRegister(lir->temp());
+  Register64 temp64 = ToRegister64(lir->temp64());
+  Register out = ToRegister(lir->output());
+
+  const MLoadUnboxedScalar* mir = lir->mir();
+
+  Scalar::Type storageType = mir->storageType();
+
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
+  auto sync = Synchronization::Load();
+
+  masm.memoryBarrierBefore(sync);
+  if (lir->index()->isConstant()) {
+    Address source =
+        ToAddress(elements, lir->index(), storageType, mir->offsetAdjustment());
+    masm.load64(source, temp64);
+  } else {
+    BaseIndex source(elements, ToRegister(lir->index()),
+                     ScaleFromScalarType(storageType), mir->offsetAdjustment());
+    masm.load64(source, temp64);
+  }
+  masm.memoryBarrierAfter(sync);
+
+  emitCreateBigInt(lir, storageType, temp64, out, temp);
+}
+
+void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+
+  Scalar::Type writeType = lir->mir()->writeType();
+
+  masm.loadBigInt64(value, temp1);
+
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
+  auto sync = Synchronization::Store();
+
+  masm.memoryBarrierBefore(sync);
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), writeType);
+    masm.store64(temp1, dest);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(writeType));
+    masm.store64(temp1, dest);
+  }
+  masm.memoryBarrierAfter(sync);
+}
+
+void CodeGenerator::visitCompareExchangeTypedArrayElement64(
+    LCompareExchangeTypedArrayElement64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register oldval = ToRegister(lir->oldval());
+  Register newval = ToRegister(lir->newval());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+  Register out = ToRegister(lir->output());
+  Register64 tempOut(out);
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  masm.loadBigInt64(oldval, temp1);
+  masm.loadBigInt64(newval, tempOut);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.compareExchange64(Synchronization::Full(), dest, temp1, tempOut,
+                           temp2);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.compareExchange64(Synchronization::Full(), dest, temp1, tempOut,
+                           temp2);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg());
+}
+
+void CodeGenerator::visitAtomicExchangeTypedArrayElement64(
+    LAtomicExchangeTypedArrayElement64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = Register64(ToRegister(lir->temp2()));
+  Register out = ToRegister(lir->output());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicExchange64(Synchronization::Full(), dest, temp1, temp2);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicExchange64(Synchronization::Full(), dest, temp1, temp2);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg());
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinop64(
+    LAtomicTypedArrayElementBinop64* lir) {
+  MOZ_ASSERT(!lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+  Register out = ToRegister(lir->output());
+  Register64 tempOut = Register64(out);
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+  AtomicOp atomicOp = lir->mir()->operation();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest,
+                         tempOut, temp2);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest,
+                         tempOut, temp2);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg());
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect64(
+    LAtomicTypedArrayElementBinopForEffect64* lir) {
+  MOZ_ASSERT(lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+  AtomicOp atomicOp = lir->mir()->operation();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest,
+                          temp2);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest,
+                          temp2);
+  }
+}
+
+void CodeGeneratorARM64::emitSimpleBinaryI64(
+    LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* lir, JSOp op) {
+  const ARMRegister dest = ARMRegister(ToOutRegister64(lir).reg, 64);
+  const ARMRegister lhs =
+      ARMRegister(ToRegister64(lir->getInt64Operand(0)).reg, 64);
+  const LInt64Allocation rhsAlloc = lir->getInt64Operand(INT64_PIECES);
+  Operand rhs;
+
+  if (IsConstant(rhsAlloc)) {
+    rhs = Operand(ToInt64(rhsAlloc));
+  } else {
+    rhs = Operand(ARMRegister(ToRegister64(rhsAlloc).reg, 64));
+  }
+  switch (op) {
+    case JSOp::Add:
+      masm.Add(dest, lhs, rhs);
+      break;
+    case JSOp::Sub:
+      masm.Sub(dest, lhs, rhs);
+      break;
+    case JSOp::BitOr:
+      masm.Orr(dest, lhs, rhs);
+      break;
+    case JSOp::BitXor:
+      masm.Eor(dest, lhs, rhs);
+      break;
+    case JSOp::BitAnd:
+      masm.And(dest, lhs, rhs);
+      break;
+    default:
+      MOZ_CRASH("unexpected binary opcode");
+  }
+}
+
+void CodeGenerator::visitAddI64(LAddI64* lir) {
+  emitSimpleBinaryI64(lir, JSOp::Add);
+}
+
+void CodeGenerator::visitClzI64(LClzI64* ins) {
+  masm.clz64(ToRegister64(ins->getInt64Operand(0)), ToRegister(ins->output()));
+}
+
+void CodeGenerator::visitCtzI64(LCtzI64* ins) {
+  masm.ctz64(ToRegister64(ins->getInt64Operand(0)), ToRegister(ins->output()));
+}
+
+void CodeGenerator::visitMulI64(LMulI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
+  const Register64 output = ToOutRegister64(lir);
+
+  if (IsConstant(rhs)) {
+    int64_t constant = ToInt64(rhs);
+    // Ad-hoc strength reduction, cf the x64 code as well as the 32-bit code
+    // higher up in this file.  Bug 1712298 will lift this code to the MIR
+    // constant folding pass, or to lowering.
+    //
+    // This is for wasm integers only, so no input guards or overflow checking
+    // are needed.
+    switch (constant) {
+      case -1:
+        masm.Neg(ARMRegister(output.reg, 64),
+                 ARMRegister(ToRegister64(lhs).reg, 64));
+        break;
+      case 0:
+        masm.Mov(ARMRegister(output.reg, 64), xzr);
+        break;
+      case 1:
+        if (ToRegister64(lhs) != output) {
+          masm.move64(ToRegister64(lhs), output);
+        }
+        break;
+      case 2:
+        masm.Add(ARMRegister(output.reg, 64),
+                 ARMRegister(ToRegister64(lhs).reg, 64),
+                 ARMRegister(ToRegister64(lhs).reg, 64));
+        break;
+      default:
+        // Use shift if constant is nonnegative power of 2.
+        if (constant > 0) {
+          int32_t shift = mozilla::FloorLog2(constant);
+          if (int64_t(1) << shift == constant) {
+            masm.Lsl(ARMRegister(output.reg, 64),
+                     ARMRegister(ToRegister64(lhs).reg, 64), shift);
+            break;
+          }
+        }
+        masm.mul64(Imm64(constant), ToRegister64(lhs), output);
+        break;
+    }
+  } else {
+    masm.mul64(ToRegister64(lhs), ToRegister64(rhs), output);
+  }
+}
+
+void CodeGenerator::visitNotI64(LNotI64* lir) {
+  const Register64 input = ToRegister64(lir->getInt64Operand(0));
+  const Register64 output = ToOutRegister64(lir);
+  masm.Cmp(ARMRegister(input.reg, 64), ZeroRegister64);
+  masm.Cset(ARMRegister(output.reg, 64), Assembler::Zero);
+}
+
+void CodeGenerator::visitSubI64(LSubI64* lir) {
+  emitSimpleBinaryI64(lir, JSOp::Sub);
+}
+
+void CodeGenerator::visitPopcntI(LPopcntI* ins) {
+  Register input = ToRegister(ins->input());
+  Register output = ToRegister(ins->output());
+  Register temp = ToRegister(ins->temp0());
+  masm.popcnt32(input, output, temp);
+}
+
+void CodeGenerator::visitBitOpI64(LBitOpI64* lir) {
+  emitSimpleBinaryI64(lir, lir->bitop());
+}
+
+void CodeGenerator::visitShiftI64(LShiftI64* lir) {
+  ARMRegister lhs(ToRegister64(lir->getInt64Operand(LShiftI64::Lhs)).reg, 64);
+  LAllocation* rhsAlloc = lir->getOperand(LShiftI64::Rhs);
+  ARMRegister dest(ToOutRegister64(lir).reg, 64);
+
+  if (rhsAlloc->isConstant()) {
+    int32_t shift = int32_t(rhsAlloc->toConstant()->toInt64() & 0x3F);
+    if (shift == 0) {
+      if (lhs.code() != dest.code()) {
+        masm.Mov(dest, lhs);
+      }
+    } else {
+      switch (lir->bitop()) {
+        case JSOp::Lsh:
+          masm.Lsl(dest, lhs, shift);
+          break;
+        case JSOp::Rsh:
+          masm.Asr(dest, lhs, shift);
+          break;
+        case JSOp::Ursh:
+          masm.Lsr(dest, lhs, shift);
+          break;
+        default:
+          MOZ_CRASH("Unexpected shift op");
+      }
+    }
+  } else {
+    ARMRegister rhs(ToRegister(rhsAlloc), 64);
+    switch (lir->bitop()) {
+      case JSOp::Lsh:
+        masm.Lsl(dest, lhs, rhs);
+        break;
+      case JSOp::Rsh:
+        masm.Asr(dest, lhs, rhs);
+        break;
+      case JSOp::Ursh:
+        masm.Lsr(dest, lhs, rhs);
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  }
+}
+
+void CodeGenerator::visitWasmHeapBase(LWasmHeapBase* ins) {
+  MOZ_ASSERT(ins->instance()->isBogus());
+  masm.movePtr(HeapReg, ToRegister(ins->output()));
+}
+
+// If we have a constant base ptr, try to add the offset to it, to generate
+// better code when the full address is known.  The addition may overflow past
+// 32 bits because the front end does nothing special if the base is a large
+// constant and base+offset overflows; sidestep this by performing the addition
+// anyway, overflowing to 64-bit.
+
+static Maybe<uint64_t> IsAbsoluteAddress(const LAllocation* ptr,
+                                         const wasm::MemoryAccessDesc& access) {
+  if (ptr->isConstantValue()) {
+    const MConstant* c = ptr->toConstant();
+    uint64_t base_address = c->type() == MIRType::Int32
+                                ? uint64_t(uint32_t(c->toInt32()))
+                                : uint64_t(c->toInt64());
+    uint64_t offset = access.offset();
+    return Some(base_address + offset);
+  }
+  return Nothing();
+}
+
+void CodeGenerator::visitWasmLoad(LWasmLoad* lir) {
+  const MWasmLoad* mir = lir->mir();
+
+  if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) {
+    masm.wasmLoadAbsolute(mir->access(), HeapReg, absAddr.value(),
+                          ToAnyRegister(lir->output()), Register64::Invalid());
+    return;
+  }
+
+  // ptr is a GPR and is either a 32-bit value zero-extended to 64-bit, or a
+  // true 64-bit value.
+  masm.wasmLoad(mir->access(), HeapReg, ToRegister(lir->ptr()),
+                ToAnyRegister(lir->output()));
+}
+
+void CodeGenerator::visitCopySignD(LCopySignD* ins) {
+  MOZ_ASSERT(ins->getTemp(0)->isBogusTemp());
+  MOZ_ASSERT(ins->getTemp(1)->isBogusTemp());
+  masm.copySignDouble(ToFloatRegister(ins->getOperand(0)),
+                      ToFloatRegister(ins->getOperand(1)),
+                      ToFloatRegister(ins->getDef(0)));
+}
+
+void CodeGenerator::visitCopySignF(LCopySignF* ins) {
+  MOZ_ASSERT(ins->getTemp(0)->isBogusTemp());
+  MOZ_ASSERT(ins->getTemp(1)->isBogusTemp());
+  masm.copySignFloat32(ToFloatRegister(ins->getOperand(0)),
+                       ToFloatRegister(ins->getOperand(1)),
+                       ToFloatRegister(ins->getDef(0)));
+}
+
+void CodeGenerator::visitPopcntI64(LPopcntI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+  Register temp = ToRegister(lir->getTemp(0));
+  masm.popcnt64(input, output, temp);
+}
+
+void CodeGenerator::visitRotateI64(LRotateI64* lir) {
+  bool rotateLeft = lir->mir()->isLeftRotate();
+  Register64 input = ToRegister64(lir->input());
+  Register64 output = ToOutRegister64(lir);
+  const LAllocation* count = lir->count();
+
+  if (count->isConstant()) {
+    int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F);
+    if (c == 0) {
+      if (input != output) {
+        masm.move64(input, output);
+        return;
+      }
+    }
+    if (rotateLeft) {
+      masm.rotateLeft64(Imm32(c), input, output, InvalidReg);
+    } else {
+      masm.rotateRight64(Imm32(c), input, output, InvalidReg);
+    }
+  } else {
+    Register c = ToRegister(count);
+    if (rotateLeft) {
+      masm.rotateLeft64(c, input, output, InvalidReg);
+    } else {
+      masm.rotateRight64(c, input, output, InvalidReg);
+    }
+  }
+}
+
+void CodeGenerator::visitWasmStore(LWasmStore* lir) {
+  const MWasmStore* mir = lir->mir();
+
+  if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) {
+    masm.wasmStoreAbsolute(mir->access(), ToAnyRegister(lir->value()),
+                           Register64::Invalid(), HeapReg, absAddr.value());
+    return;
+  }
+
+  masm.wasmStore(mir->access(), ToAnyRegister(lir->value()), HeapReg,
+                 ToRegister(lir->ptr()));
+}
+
+void CodeGenerator::visitCompareI64(LCompareI64* lir) {
+  MCompare* mir = lir->mir();
+  MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 ||
+             mir->compareType() == MCompare::Compare_UInt64);
+
+  const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs);
+  Register lhsReg = ToRegister64(lhs).reg;
+  Register output = ToRegister(lir->output());
+  bool isSigned = mir->compareType() == MCompare::Compare_Int64;
+
+  if (IsConstant(rhs)) {
+    masm.cmpPtrSet(JSOpToCondition(lir->jsop(), isSigned), lhsReg,
+                   ImmWord(ToInt64(rhs)), output);
+  } else if (rhs.value().isGeneralReg()) {
+    masm.cmpPtrSet(JSOpToCondition(lir->jsop(), isSigned), lhsReg,
+                   ToRegister64(rhs).reg, output);
+  } else {
+    masm.cmpPtrSet(
+        GetCondForSwappedOperands(JSOpToCondition(lir->jsop(), isSigned)),
+        ToAddress(rhs.value()), lhsReg, output);
+  }
+}
+
+void CodeGenerator::visitWasmSelect(LWasmSelect* lir) {
+  MIRType mirType = lir->mir()->type();
+  Register condReg = ToRegister(lir->condExpr());
+
+  masm.test32(condReg, condReg);
+
+  switch (mirType) {
+    case MIRType::Int32:
+    case MIRType::RefOrNull: {
+      Register outReg = ToRegister(lir->output());
+      Register trueReg = ToRegister(lir->trueExpr());
+      Register falseReg = ToRegister(lir->falseExpr());
+
+      if (mirType == MIRType::Int32) {
+        masm.Csel(ARMRegister(outReg, 32), ARMRegister(trueReg, 32),
+                  ARMRegister(falseReg, 32), Assembler::NonZero);
+      } else {
+        masm.Csel(ARMRegister(outReg, 64), ARMRegister(trueReg, 64),
+                  ARMRegister(falseReg, 64), Assembler::NonZero);
+      }
+      break;
+    }
+
+    case MIRType::Float32:
+    case MIRType::Double:
+    case MIRType::Simd128: {
+      FloatRegister outReg = ToFloatRegister(lir->output());
+      FloatRegister trueReg = ToFloatRegister(lir->trueExpr());
+      FloatRegister falseReg = ToFloatRegister(lir->falseExpr());
+
+      switch (mirType) {
+        case MIRType::Float32:
+          masm.Fcsel(ARMFPRegister(outReg, 32), ARMFPRegister(trueReg, 32),
+                     ARMFPRegister(falseReg, 32), Assembler::NonZero);
+          break;
+        case MIRType::Double:
+          masm.Fcsel(ARMFPRegister(outReg, 64), ARMFPRegister(trueReg, 64),
+                     ARMFPRegister(falseReg, 64), Assembler::NonZero);
+          break;
+#ifdef ENABLE_WASM_SIMD
+        case MIRType::Simd128: {
+          MOZ_ASSERT(outReg == trueReg);
+          Label done;
+          masm.j(Assembler::NonZero, &done);
+          masm.moveSimd128(falseReg, outReg);
+          masm.bind(&done);
+          break;
+        }
+#endif
+        default:
+          MOZ_CRASH();
+      }
+      break;
+    }
+
+    default: {
+      MOZ_CRASH("unhandled type in visitWasmSelect!");
+    }
+  }
+}
+
+// We expect to handle the cases: compare is {{U,}Int32, {U,}Int64}, Float32,
+// Double}, and select is {{U,}Int32, {U,}Int64}, Float32, Double},
+// independently.
+void CodeGenerator::visitWasmCompareAndSelect(LWasmCompareAndSelect* ins) {
+  MCompare::CompareType compTy = ins->compareType();
+
+  // Set flag.
+  if (compTy == MCompare::Compare_Int32 || compTy == MCompare::Compare_UInt32) {
+    Register lhs = ToRegister(ins->leftExpr());
+    if (ins->rightExpr()->isConstant()) {
+      masm.cmp32(lhs, Imm32(ins->rightExpr()->toConstant()->toInt32()));
+    } else {
+      masm.cmp32(lhs, ToRegister(ins->rightExpr()));
+    }
+  } else if (compTy == MCompare::Compare_Int64 ||
+             compTy == MCompare::Compare_UInt64) {
+    Register lhs = ToRegister(ins->leftExpr());
+    if (ins->rightExpr()->isConstant()) {
+      masm.cmpPtr(lhs, Imm64(ins->rightExpr()->toConstant()->toInt64()));
+    } else {
+      masm.cmpPtr(lhs, ToRegister(ins->rightExpr()));
+    }
+  } else if (compTy == MCompare::Compare_Float32) {
+    masm.compareFloat(JSOpToDoubleCondition(ins->jsop()),
+                      ToFloatRegister(ins->leftExpr()),
+                      ToFloatRegister(ins->rightExpr()));
+  } else if (compTy == MCompare::Compare_Double) {
+    masm.compareDouble(JSOpToDoubleCondition(ins->jsop()),
+                       ToFloatRegister(ins->leftExpr()),
+                       ToFloatRegister(ins->rightExpr()));
+  } else {
+    // Ref types not supported yet; v128 is not yet observed to be worth
+    // optimizing.
+    MOZ_CRASH("CodeGenerator::visitWasmCompareAndSelect: unexpected type (1)");
+  }
+
+  // Act on flag.
+  Assembler::Condition cond;
+  if (compTy == MCompare::Compare_Float32 ||
+      compTy == MCompare::Compare_Double) {
+    cond = Assembler::ConditionFromDoubleCondition(
+        JSOpToDoubleCondition(ins->jsop()));
+  } else {
+    cond = JSOpToCondition(compTy, ins->jsop());
+  }
+  MIRType insTy = ins->mir()->type();
+  if (insTy == MIRType::Int32 || insTy == MIRType::Int64) {
+    Register destReg = ToRegister(ins->output());
+    Register trueReg = ToRegister(ins->ifTrueExpr());
+    Register falseReg = ToRegister(ins->ifFalseExpr());
+    size_t size = insTy == MIRType::Int32 ? 32 : 64;
+    masm.Csel(ARMRegister(destReg, size), ARMRegister(trueReg, size),
+              ARMRegister(falseReg, size), cond);
+  } else if (insTy == MIRType::Float32 || insTy == MIRType::Double) {
+    FloatRegister destReg = ToFloatRegister(ins->output());
+    FloatRegister trueReg = ToFloatRegister(ins->ifTrueExpr());
+    FloatRegister falseReg = ToFloatRegister(ins->ifFalseExpr());
+    size_t size = MIRTypeToSize(insTy) * 8;
+    masm.Fcsel(ARMFPRegister(destReg, size), ARMFPRegister(trueReg, size),
+               ARMFPRegister(falseReg, size), cond);
+  } else {
+    // See above.
+    MOZ_CRASH("CodeGenerator::visitWasmCompareAndSelect: unexpected type (2)");
+  }
+}
+
+void CodeGenerator::visitWasmLoadI64(LWasmLoadI64* lir) {
+  const MWasmLoad* mir = lir->mir();
+
+  if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) {
+    masm.wasmLoadAbsolute(mir->access(), HeapReg, absAddr.value(),
+                          AnyRegister(), ToOutRegister64(lir));
+    return;
+  }
+
+  masm.wasmLoadI64(mir->access(), HeapReg, ToRegister(lir->ptr()),
+                   ToOutRegister64(lir));
+}
+
+void CodeGenerator::visitWasmStoreI64(LWasmStoreI64* lir) {
+  const MWasmStore* mir = lir->mir();
+
+  if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) {
+    masm.wasmStoreAbsolute(mir->access(), AnyRegister(),
+                           ToRegister64(lir->value()), HeapReg,
+                           absAddr.value());
+    return;
+  }
+
+  masm.wasmStoreI64(mir->access(), ToRegister64(lir->value()), HeapReg,
+                    ToRegister(lir->ptr()));
+}
+
+void CodeGenerator::visitMemoryBarrier(LMemoryBarrier* ins) {
+  masm.memoryBarrier(ins->type());
+}
+
+void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) {
+  MWasmAddOffset* mir = lir->mir();
+  Register base = ToRegister(lir->base());
+  Register out = ToRegister(lir->output());
+
+  masm.Adds(ARMRegister(out, 32), ARMRegister(base, 32),
+            Operand(mir->offset()));
+  OutOfLineAbortingWasmTrap* ool = new (alloc())
+      OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds);
+  addOutOfLineCode(ool, mir);
+  masm.j(Assembler::CarrySet, ool->entry());
+}
+
+void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) {
+  MWasmAddOffset* mir = lir->mir();
+  Register64 base = ToRegister64(lir->base());
+  Register64 out = ToOutRegister64(lir);
+
+  masm.Adds(ARMRegister(out.reg, 64), ARMRegister(base.reg, 64),
+            Operand(mir->offset()));
+  OutOfLineAbortingWasmTrap* ool = new (alloc())
+      OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds);
+  addOutOfLineCode(ool, mir);
+  masm.j(Assembler::CarrySet, ool->entry());
+}
+
+void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) {
+  MOZ_ASSERT(lir->mir()->type() == MIRType::Int64);
+  Register condReg = ToRegister(lir->condExpr());
+  Register64 trueReg = ToRegister64(lir->trueExpr());
+  Register64 falseReg = ToRegister64(lir->falseExpr());
+  Register64 outReg = ToOutRegister64(lir);
+
+  masm.test32(condReg, condReg);
+  masm.Csel(ARMRegister(outReg.reg, 64), ARMRegister(trueReg.reg, 64),
+            ARMRegister(falseReg.reg, 64), Assembler::NonZero);
+}
+
+void CodeGenerator::visitSignExtendInt64(LSignExtendInt64* ins) {
+  Register64 input = ToRegister64(ins->getInt64Operand(0));
+  Register64 output = ToOutRegister64(ins);
+  switch (ins->mode()) {
+    case MSignExtendInt64::Byte:
+      masm.move8To64SignExtend(input.reg, output);
+      break;
+    case MSignExtendInt64::Half:
+      masm.move16To64SignExtend(input.reg, output);
+      break;
+    case MSignExtendInt64::Word:
+      masm.move32To64SignExtend(input.reg, output);
+      break;
+  }
+}
+
+void CodeGenerator::visitWasmReinterpret(LWasmReinterpret* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MWasmReinterpret* ins = lir->mir();
+
+  MIRType to = ins->type();
+  mozilla::DebugOnly<MIRType> from = ins->input()->type();
+
+  switch (to) {
+    case MIRType::Int32:
+      MOZ_ASSERT(from == MIRType::Float32);
+      masm.moveFloat32ToGPR(ToFloatRegister(lir->input()),
+                            ToRegister(lir->output()));
+      break;
+    case MIRType::Float32:
+      MOZ_ASSERT(from == MIRType::Int32);
+      masm.moveGPRToFloat32(ToRegister(lir->input()),
+                            ToFloatRegister(lir->output()));
+      break;
+    case MIRType::Double:
+    case MIRType::Int64:
+      MOZ_CRASH("not handled by this LIR opcode");
+    default:
+      MOZ_CRASH("unexpected WasmReinterpret");
+  }
+}
+
+void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) {
+  const MWasmStackArg* mir = ins->mir();
+  Address dst(masm.getStackPointer(), mir->spOffset());
+  if (IsConstant(ins->arg())) {
+    masm.store64(Imm64(ToInt64(ins->arg())), dst);
+  } else {
+    masm.store64(ToRegister64(ins->arg()), dst);
+  }
+}
+
+void CodeGenerator::visitTestI64AndBranch(LTestI64AndBranch* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  MBasicBlock* mirTrue = lir->ifTrue();
+  MBasicBlock* mirFalse = lir->ifFalse();
+
+  // Jump to the True block if NonZero.
+  // Jump to the False block if Zero.
+  if (isNextBlock(mirFalse->lir())) {
+    masm.Cbnz(ARMRegister(input.reg, 64), getJumpLabelForBranch(mirTrue));
+  } else {
+    masm.Cbz(ARMRegister(input.reg, 64), getJumpLabelForBranch(mirFalse));
+    if (!isNextBlock(mirTrue->lir())) {
+      jumpToBlock(mirTrue);
+    }
+  }
+}
+
+void CodeGenerator::visitWrapInt64ToInt32(LWrapInt64ToInt32* lir) {
+  const LAllocation* input = lir->getOperand(0);
+  Register output = ToRegister(lir->output());
+
+  if (lir->mir()->bottomHalf()) {
+    if (input->isMemory()) {
+      masm.load32(ToAddress(input), output);
+    } else {
+      // Really this is a 64-bit input register and we could use move64To32.
+      masm.Mov(ARMRegister(output, 32), ARMRegister(ToRegister(input), 32));
+    }
+  } else {
+    MOZ_CRASH("Not implemented.");
+  }
+}
+
+void CodeGenerator::visitExtendInt32ToInt64(LExtendInt32ToInt64* lir) {
+  Register input = ToRegister(lir->getOperand(0));
+  Register64 output = ToOutRegister64(lir);
+
+  if (lir->mir()->isUnsigned()) {
+    masm.move32To64ZeroExtend(input, output);
+  } else {
+    masm.move32To64SignExtend(input, output);
+  }
+}
+
+void CodeGenerator::visitWasmExtendU32Index(LWasmExtendU32Index* lir) {
+  // Generates no code on this platform because the input is assumed to have
+  // canonical form.
+  Register output = ToRegister(lir->output());
+  MOZ_ASSERT(ToRegister(lir->input()) == output);
+  masm.debugAssertCanonicalInt32(output);
+}
+
+void CodeGenerator::visitWasmWrapU32Index(LWasmWrapU32Index* lir) {
+  // Generates no code on this platform because the input is assumed to have
+  // canonical form.
+  Register output = ToRegister(lir->output());
+  MOZ_ASSERT(ToRegister(lir->input()) == output);
+  masm.debugAssertCanonicalInt32(output);
+}
+
+void CodeGenerator::visitCompareI64AndBranch(LCompareI64AndBranch* comp) {
+  const MCompare* mir = comp->cmpMir();
+  const mozilla::DebugOnly<MCompare::CompareType> type = mir->compareType();
+  const LInt64Allocation left =
+      comp->getInt64Operand(LCompareI64AndBranch::Lhs);
+  const LInt64Allocation right =
+      comp->getInt64Operand(LCompareI64AndBranch::Rhs);
+
+  MOZ_ASSERT(type == MCompare::Compare_Int64 ||
+             type == MCompare::Compare_UInt64);
+  if (IsConstant(right)) {
+    masm.Cmp(ARMRegister(ToRegister64(left).reg, 64), ToInt64(right));
+  } else {
+    masm.Cmp(ARMRegister(ToRegister64(left).reg, 64),
+             ARMRegister(ToRegister64(right).reg, 64));
+  }
+
+  bool isSigned = mir->compareType() == MCompare::Compare_Int64;
+  Assembler::Condition cond = JSOpToCondition(comp->jsop(), isSigned);
+  emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) {
+  auto input = ToFloatRegister(lir->input());
+  auto output = ToRegister(lir->output());
+
+  MWasmTruncateToInt32* mir = lir->mir();
+  MIRType fromType = mir->input()->type();
+
+  MOZ_ASSERT(fromType == MIRType::Double || fromType == MIRType::Float32);
+
+  auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output);
+  addOutOfLineCode(ool, mir);
+
+  Label* oolEntry = ool->entry();
+  if (mir->isUnsigned()) {
+    if (fromType == MIRType::Double) {
+      masm.wasmTruncateDoubleToUInt32(input, output, mir->isSaturating(),
+                                      oolEntry);
+    } else if (fromType == MIRType::Float32) {
+      masm.wasmTruncateFloat32ToUInt32(input, output, mir->isSaturating(),
+                                       oolEntry);
+    } else {
+      MOZ_CRASH("unexpected type");
+    }
+
+    masm.bind(ool->rejoin());
+    return;
+  }
+
+  if (fromType == MIRType::Double) {
+    masm.wasmTruncateDoubleToInt32(input, output, mir->isSaturating(),
+                                   oolEntry);
+  } else if (fromType == MIRType::Float32) {
+    masm.wasmTruncateFloat32ToInt32(input, output, mir->isSaturating(),
+                                    oolEntry);
+  } else {
+    MOZ_CRASH("unexpected type");
+  }
+
+  masm.bind(ool->rejoin());
+}
+
+void CodeGenerator::visitWasmTruncateToInt64(LWasmTruncateToInt64* lir) {
+  FloatRegister input = ToFloatRegister(lir->input());
+  Register64 output = ToOutRegister64(lir);
+
+  MWasmTruncateToInt64* mir = lir->mir();
+  MIRType fromType = mir->input()->type();
+
+  MOZ_ASSERT(fromType == MIRType::Double || fromType == MIRType::Float32);
+
+  auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output);
+  addOutOfLineCode(ool, mir);
+
+  Label* oolEntry = ool->entry();
+  Label* oolRejoin = ool->rejoin();
+  bool isSaturating = mir->isSaturating();
+
+  if (fromType == MIRType::Double) {
+    if (mir->isUnsigned()) {
+      masm.wasmTruncateDoubleToUInt64(input, output, isSaturating, oolEntry,
+                                      oolRejoin, InvalidFloatReg);
+    } else {
+      masm.wasmTruncateDoubleToInt64(input, output, isSaturating, oolEntry,
+                                     oolRejoin, InvalidFloatReg);
+    }
+  } else {
+    if (mir->isUnsigned()) {
+      masm.wasmTruncateFloat32ToUInt64(input, output, isSaturating, oolEntry,
+                                       oolRejoin, InvalidFloatReg);
+    } else {
+      masm.wasmTruncateFloat32ToInt64(input, output, isSaturating, oolEntry,
+                                      oolRejoin, InvalidFloatReg);
+    }
+  }
+}
+
+void CodeGeneratorARM64::visitOutOfLineWasmTruncateCheck(
+    OutOfLineWasmTruncateCheck* ool) {
+  FloatRegister input = ool->input();
+  Register output = ool->output();
+  Register64 output64 = ool->output64();
+  MIRType fromType = ool->fromType();
+  MIRType toType = ool->toType();
+  Label* oolRejoin = ool->rejoin();
+  TruncFlags flags = ool->flags();
+  wasm::BytecodeOffset off = ool->bytecodeOffset();
+
+  if (fromType == MIRType::Float32) {
+    if (toType == MIRType::Int32) {
+      masm.oolWasmTruncateCheckF32ToI32(input, output, flags, off, oolRejoin);
+    } else if (toType == MIRType::Int64) {
+      masm.oolWasmTruncateCheckF32ToI64(input, output64, flags, off, oolRejoin);
+    } else {
+      MOZ_CRASH("unexpected type");
+    }
+  } else if (fromType == MIRType::Double) {
+    if (toType == MIRType::Int32) {
+      masm.oolWasmTruncateCheckF64ToI32(input, output, flags, off, oolRejoin);
+    } else if (toType == MIRType::Int64) {
+      masm.oolWasmTruncateCheckF64ToI64(input, output64, flags, off, oolRejoin);
+    } else {
+      MOZ_CRASH("unexpected type");
+    }
+  } else {
+    MOZ_CRASH("unexpected type");
+  }
+}
+
+void CodeGenerator::visitWasmReinterpretToI64(LWasmReinterpretToI64* lir) {
+  MOZ_ASSERT(lir->mir()->type() == MIRType::Int64);
+  MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Double);
+  masm.moveDoubleToGPR64(ToFloatRegister(lir->input()), ToOutRegister64(lir));
+}
+
+void CodeGenerator::visitWasmReinterpretFromI64(LWasmReinterpretFromI64* lir) {
+  MOZ_ASSERT(lir->mir()->type() == MIRType::Double);
+  MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Int64);
+  masm.moveGPR64ToDouble(
+      ToRegister64(lir->getInt64Operand(LWasmReinterpretFromI64::Input)),
+      ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinop(
+    LAtomicTypedArrayElementBinop* lir) {
+  MOZ_ASSERT(!lir->mir()->isForEffect());
+
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register elements = ToRegister(lir->elements());
+  Register flagTemp = ToRegister(lir->temp1());
+  Register outTemp =
+      lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
+  Register value = ToRegister(lir->value());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address mem = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
+                         lir->mir()->operation(), value, mem, flagTemp, outTemp,
+                         output);
+  } else {
+    BaseIndex mem(elements, ToRegister(lir->index()),
+                  ScaleFromScalarType(arrayType));
+    masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
+                         lir->mir()->operation(), value, mem, flagTemp, outTemp,
+                         output);
+  }
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect(
+    LAtomicTypedArrayElementBinopForEffect* lir) {
+  MOZ_ASSERT(lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register flagTemp = ToRegister(lir->flagTemp());
+  Register value = ToRegister(lir->value());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address mem = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
+                          lir->mir()->operation(), value, mem, flagTemp);
+  } else {
+    BaseIndex mem(elements, ToRegister(lir->index()),
+                  ScaleFromScalarType(arrayType));
+    masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
+                          lir->mir()->operation(), value, mem, flagTemp);
+  }
+}
+
+void CodeGenerator::visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  FloatRegister output = ToFloatRegister(lir->output());
+
+  MIRType outputType = lir->mir()->type();
+  MOZ_ASSERT(outputType == MIRType::Double || outputType == MIRType::Float32);
+
+  if (outputType == MIRType::Double) {
+    if (lir->mir()->isUnsigned()) {
+      masm.convertUInt64ToDouble(input, output, Register::Invalid());
+    } else {
+      masm.convertInt64ToDouble(input, output);
+    }
+  } else {
+    if (lir->mir()->isUnsigned()) {
+      masm.convertUInt64ToFloat32(input, output, Register::Invalid());
+    } else {
+      masm.convertInt64ToFloat32(input, output);
+    }
+  }
+}
+
+void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) {
+  Register lhs = ToRegister(lir->lhs());
+  Register rhs = ToRegister(lir->rhs());
+  Register output = ToRegister(lir->output());
+
+  Label done;
+
+  // Handle divide by zero.
+  if (lir->canBeDivideByZero()) {
+    Label isNotDivByZero;
+    masm.Cbnz(ARMRegister(rhs, 64), &isNotDivByZero);
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset());
+    masm.bind(&isNotDivByZero);
+  }
+
+  // Handle an integer overflow exception from INT64_MIN / -1.
+  if (lir->canBeNegativeOverflow()) {
+    Label noOverflow;
+    masm.branchPtr(Assembler::NotEqual, lhs, ImmWord(INT64_MIN), &noOverflow);
+    masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), &noOverflow);
+    if (lir->mir()->isMod()) {
+      masm.movePtr(ImmWord(0), output);
+    } else {
+      masm.wasmTrap(wasm::Trap::IntegerOverflow, lir->bytecodeOffset());
+    }
+    masm.jump(&done);
+    masm.bind(&noOverflow);
+  }
+
+  masm.Sdiv(ARMRegister(output, 64), ARMRegister(lhs, 64),
+            ARMRegister(rhs, 64));
+  if (lir->mir()->isMod()) {
+    masm.Msub(ARMRegister(output, 64), ARMRegister(output, 64),
+              ARMRegister(rhs, 64), ARMRegister(lhs, 64));
+  }
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitUDivOrModI64(LUDivOrModI64* lir) {
+  Register lhs = ToRegister(lir->lhs());
+  Register rhs = ToRegister(lir->rhs());
+  Register output = ToRegister(lir->output());
+
+  Label done;
+
+  // Handle divide by zero.
+  if (lir->canBeDivideByZero()) {
+    Label isNotDivByZero;
+    masm.Cbnz(ARMRegister(rhs, 64), &isNotDivByZero);
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset());
+    masm.bind(&isNotDivByZero);
+  }
+
+  masm.Udiv(ARMRegister(output, 64), ARMRegister(lhs, 64),
+            ARMRegister(rhs, 64));
+  if (lir->mir()->isMod()) {
+    masm.Msub(ARMRegister(output, 64), ARMRegister(output, 64),
+              ARMRegister(rhs, 64), ARMRegister(lhs, 64));
+  }
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitSimd128(LSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantSimd128(ins->simd128(), ToFloatRegister(out));
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::V128Bitselect: {
+      FloatRegister lhs = ToFloatRegister(ins->v0());
+      FloatRegister rhs = ToFloatRegister(ins->v1());
+      FloatRegister controlDest = ToFloatRegister(ins->v2());
+      masm.bitwiseSelectSimd128(lhs, rhs, controlDest);
+      break;
+    }
+    case wasm::SimdOp::F32x4RelaxedFma:
+      masm.fmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+                        ToFloatRegister(ins->v2()));
+      break;
+    case wasm::SimdOp::F32x4RelaxedFnma:
+      masm.fnmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+                         ToFloatRegister(ins->v2()));
+      break;
+    case wasm::SimdOp::F64x2RelaxedFma:
+      masm.fmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+                        ToFloatRegister(ins->v2()));
+      break;
+    case wasm::SimdOp::F64x2RelaxedFnma:
+      masm.fnmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+                         ToFloatRegister(ins->v2()));
+      break;
+    case wasm::SimdOp::I8x16RelaxedLaneSelect:
+    case wasm::SimdOp::I16x8RelaxedLaneSelect:
+    case wasm::SimdOp::I32x4RelaxedLaneSelect:
+    case wasm::SimdOp::I64x2RelaxedLaneSelect: {
+      FloatRegister lhs = ToFloatRegister(ins->v0());
+      FloatRegister rhs = ToFloatRegister(ins->v1());
+      FloatRegister maskDest = ToFloatRegister(ins->v2());
+      masm.laneSelectSimd128(maskDest, lhs, rhs, maskDest);
+      break;
+    }
+    case wasm::SimdOp::I32x4DotI8x16I7x16AddS:
+      masm.dotInt8x16Int7x16ThenAdd(
+          ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+          ToFloatRegister(ins->v2()), ToFloatRegister(ins->temp()));
+      break;
+    default:
+      MOZ_CRASH("NYI");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister lhs = ToFloatRegister(ins->lhs());
+  FloatRegister rhs = ToFloatRegister(ins->rhs());
+  FloatRegister dest = ToFloatRegister(ins->output());
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::V128And:
+      masm.bitwiseAndSimd128(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::V128Or:
+      masm.bitwiseOrSimd128(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::V128Xor:
+      masm.bitwiseXorSimd128(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::V128AndNot:
+      masm.bitwiseAndNotSimd128(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16AvgrU:
+      masm.unsignedAverageInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8AvgrU:
+      masm.unsignedAverageInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16Add:
+      masm.addInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16AddSatS:
+      masm.addSatInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16AddSatU:
+      masm.unsignedAddSatInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16Sub:
+      masm.subInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16SubSatS:
+      masm.subSatInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16SubSatU:
+      masm.unsignedSubSatInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16MinS:
+      masm.minInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16MinU:
+      masm.unsignedMinInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16MaxS:
+      masm.maxInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16MaxU:
+      masm.unsignedMaxInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8Add:
+      masm.addInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8AddSatS:
+      masm.addSatInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8AddSatU:
+      masm.unsignedAddSatInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8Sub:
+      masm.subInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8SubSatS:
+      masm.subSatInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8SubSatU:
+      masm.unsignedSubSatInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8Mul:
+      masm.mulInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8MinS:
+      masm.minInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8MinU:
+      masm.unsignedMinInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8MaxS:
+      masm.maxInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8MaxU:
+      masm.unsignedMaxInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4Add:
+      masm.addInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4Sub:
+      masm.subInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4Mul:
+      masm.mulInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4MinS:
+      masm.minInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4MinU:
+      masm.unsignedMinInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4MaxS:
+      masm.maxInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4MaxU:
+      masm.unsignedMaxInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2Add:
+      masm.addInt64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2Sub:
+      masm.subInt64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2Mul: {
+      auto temp1 = ToFloatRegister(ins->getTemp(0));
+      auto temp2 = ToFloatRegister(ins->getTemp(1));
+      masm.mulInt64x2(lhs, rhs, dest, temp1, temp2);
+      break;
+    }
+    case wasm::SimdOp::F32x4Add:
+      masm.addFloat32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Sub:
+      masm.subFloat32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Mul:
+      masm.mulFloat32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Div:
+      masm.divFloat32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Min:
+      masm.minFloat32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Max:
+      masm.maxFloat32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Add:
+      masm.addFloat64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Sub:
+      masm.subFloat64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Mul:
+      masm.mulFloat64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Div:
+      masm.divFloat64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Min:
+      masm.minFloat64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Max:
+      masm.maxFloat64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16Swizzle:
+      masm.swizzleInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16RelaxedSwizzle:
+      masm.swizzleInt8x16Relaxed(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16NarrowI16x8S:
+      masm.narrowInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16NarrowI16x8U:
+      masm.unsignedNarrowInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8NarrowI32x4S:
+      masm.narrowInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8NarrowI32x4U:
+      masm.unsignedNarrowInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16Eq:
+      masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16Ne:
+      masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16LtS:
+      masm.compareInt8x16(Assembler::LessThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16GtS:
+      masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16LeS:
+      masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16GeS:
+      masm.compareInt8x16(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16LtU:
+      masm.compareInt8x16(Assembler::Below, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16GtU:
+      masm.compareInt8x16(Assembler::Above, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16LeU:
+      masm.compareInt8x16(Assembler::BelowOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16GeU:
+      masm.compareInt8x16(Assembler::AboveOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8Eq:
+      masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8Ne:
+      masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8LtS:
+      masm.compareInt16x8(Assembler::LessThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8GtS:
+      masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8LeS:
+      masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8GeS:
+      masm.compareInt16x8(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8LtU:
+      masm.compareInt16x8(Assembler::Below, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8GtU:
+      masm.compareInt16x8(Assembler::Above, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8LeU:
+      masm.compareInt16x8(Assembler::BelowOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8GeU:
+      masm.compareInt16x8(Assembler::AboveOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4Eq:
+      masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4Ne:
+      masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4LtS:
+      masm.compareInt32x4(Assembler::LessThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4GtS:
+      masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4LeS:
+      masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4GeS:
+      masm.compareInt32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4LtU:
+      masm.compareInt32x4(Assembler::Below, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4GtU:
+      masm.compareInt32x4(Assembler::Above, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4LeU:
+      masm.compareInt32x4(Assembler::BelowOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4GeU:
+      masm.compareInt32x4(Assembler::AboveOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2Eq:
+      masm.compareInt64x2(Assembler::Equal, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2LtS:
+      masm.compareInt64x2(Assembler::LessThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2GtS:
+      masm.compareInt64x2(Assembler::GreaterThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2LeS:
+      masm.compareInt64x2(Assembler::LessThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2GeS:
+      masm.compareInt64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2Ne:
+      masm.compareInt64x2(Assembler::NotEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Eq:
+      masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Ne:
+      masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Lt:
+      masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Gt:
+      masm.compareFloat32x4(Assembler::GreaterThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Le:
+      masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4Ge:
+      masm.compareFloat32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Eq:
+      masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Ne:
+      masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Lt:
+      masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Gt:
+      masm.compareFloat64x2(Assembler::GreaterThan, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Le:
+      masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2Ge:
+      masm.compareFloat64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4PMax:
+      masm.pseudoMaxFloat32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4PMin:
+      masm.pseudoMinFloat32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2PMax:
+      masm.pseudoMaxFloat64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2PMin:
+      masm.pseudoMinFloat64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4DotI16x8S:
+      masm.widenDotInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtmulLowI8x16S:
+      masm.extMulLowInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtmulHighI8x16S:
+      masm.extMulHighInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtmulLowI8x16U:
+      masm.unsignedExtMulLowInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtmulHighI8x16U:
+      masm.unsignedExtMulHighInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtmulLowI16x8S:
+      masm.extMulLowInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtmulHighI16x8S:
+      masm.extMulHighInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtmulLowI16x8U:
+      masm.unsignedExtMulLowInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtmulHighI16x8U:
+      masm.unsignedExtMulHighInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2ExtmulLowI32x4S:
+      masm.extMulLowInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2ExtmulHighI32x4S:
+      masm.extMulHighInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2ExtmulLowI32x4U:
+      masm.unsignedExtMulLowInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2ExtmulHighI32x4U:
+      masm.unsignedExtMulHighInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8Q15MulrSatS:
+      masm.q15MulrSatInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4RelaxedMin:
+      masm.minFloat32x4Relaxed(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F32x4RelaxedMax:
+      masm.maxFloat32x4Relaxed(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2RelaxedMin:
+      masm.minFloat64x2Relaxed(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::F64x2RelaxedMax:
+      masm.maxFloat64x2Relaxed(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8RelaxedQ15MulrS:
+      masm.q15MulrInt16x8Relaxed(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8DotI8x16I7x16S:
+      masm.dotInt8x16Int7x16(lhs, rhs, dest);
+      break;
+    default:
+      MOZ_CRASH("Binary SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmBinarySimd128WithConstant(
+    LWasmBinarySimd128WithConstant* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmVariableShiftSimd128(
+    LWasmVariableShiftSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister lhs = ToFloatRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  FloatRegister dest = ToFloatRegister(ins->output());
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::I8x16Shl:
+      masm.leftShiftInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16ShrS:
+      masm.rightShiftInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I8x16ShrU:
+      masm.unsignedRightShiftInt8x16(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8Shl:
+      masm.leftShiftInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8ShrS:
+      masm.rightShiftInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I16x8ShrU:
+      masm.unsignedRightShiftInt16x8(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4Shl:
+      masm.leftShiftInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4ShrS:
+      masm.rightShiftInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I32x4ShrU:
+      masm.unsignedRightShiftInt32x4(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2Shl:
+      masm.leftShiftInt64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2ShrS:
+      masm.rightShiftInt64x2(lhs, rhs, dest);
+      break;
+    case wasm::SimdOp::I64x2ShrU:
+      masm.unsignedRightShiftInt64x2(lhs, rhs, dest);
+      break;
+    default:
+      MOZ_CRASH("Shift SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmConstantShiftSimd128(
+    LWasmConstantShiftSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister src = ToFloatRegister(ins->src());
+  FloatRegister dest = ToFloatRegister(ins->output());
+  int32_t shift = ins->shift();
+
+  if (shift == 0) {
+    if (src != dest) {
+      masm.moveSimd128(src, dest);
+    }
+    return;
+  }
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::I8x16Shl:
+      masm.leftShiftInt8x16(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I8x16ShrS:
+      masm.rightShiftInt8x16(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I8x16ShrU:
+      masm.unsignedRightShiftInt8x16(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I16x8Shl:
+      masm.leftShiftInt16x8(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I16x8ShrS:
+      masm.rightShiftInt16x8(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I16x8ShrU:
+      masm.unsignedRightShiftInt16x8(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I32x4Shl:
+      masm.leftShiftInt32x4(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I32x4ShrS:
+      masm.rightShiftInt32x4(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I32x4ShrU:
+      masm.unsignedRightShiftInt32x4(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I64x2Shl:
+      masm.leftShiftInt64x2(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I64x2ShrS:
+      masm.rightShiftInt64x2(Imm32(shift), src, dest);
+      break;
+    case wasm::SimdOp::I64x2ShrU:
+      masm.unsignedRightShiftInt64x2(Imm32(shift), src, dest);
+      break;
+    default:
+      MOZ_CRASH("Shift SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmSignReplicationSimd128(
+    LWasmSignReplicationSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister lhs = ToFloatRegister(ins->lhs());
+  FloatRegister rhs = ToFloatRegister(ins->rhs());
+  FloatRegister dest = ToFloatRegister(ins->output());
+  MOZ_ASSERT(ins->temp()->isBogusTemp());
+  SimdConstant control = ins->control();
+  switch (ins->op()) {
+    case SimdShuffleOp::BLEND_8x16: {
+      masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
+                        lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::BLEND_16x8: {
+      masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()),
+                        lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: {
+      int8_t count = 16 - control.asInt8x16()[0];
+      MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
+      masm.concatAndRightShiftSimd128(lhs, rhs, dest, count);
+      break;
+    }
+    case SimdShuffleOp::INTERLEAVE_HIGH_8x16: {
+      masm.interleaveHighInt8x16(lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::INTERLEAVE_HIGH_16x8: {
+      masm.interleaveHighInt16x8(lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::INTERLEAVE_HIGH_32x4: {
+      masm.interleaveHighInt32x4(lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::INTERLEAVE_HIGH_64x2: {
+      masm.interleaveHighInt64x2(lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::INTERLEAVE_LOW_8x16: {
+      masm.interleaveLowInt8x16(lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::INTERLEAVE_LOW_16x8: {
+      masm.interleaveLowInt16x8(lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::INTERLEAVE_LOW_32x4: {
+      masm.interleaveLowInt32x4(lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::INTERLEAVE_LOW_64x2: {
+      masm.interleaveLowInt64x2(lhs, rhs, dest);
+      break;
+    }
+    case SimdShuffleOp::SHUFFLE_BLEND_8x16: {
+      masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
+                          lhs, rhs, dest);
+      break;
+    }
+    default: {
+      MOZ_CRASH("Unsupported SIMD shuffle operation");
+    }
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister src = ToFloatRegister(ins->src());
+  FloatRegister dest = ToFloatRegister(ins->output());
+  SimdConstant control = ins->control();
+  switch (ins->op()) {
+    case SimdPermuteOp::BROADCAST_8x16: {
+      const SimdConstant::I8x16& mask = control.asInt8x16();
+      int8_t source = mask[0];
+      masm.splatX16(source, src, dest);
+      break;
+    }
+    case SimdPermuteOp::BROADCAST_16x8: {
+      const SimdConstant::I16x8& mask = control.asInt16x8();
+      int16_t source = mask[0];
+      masm.splatX8(source, src, dest);
+      break;
+    }
+    case SimdPermuteOp::MOVE: {
+      masm.moveSimd128(src, dest);
+      break;
+    }
+    case SimdPermuteOp::PERMUTE_8x16: {
+      const SimdConstant::I8x16& mask = control.asInt8x16();
+#  ifdef DEBUG
+      mozilla::DebugOnly<int> i;
+      for (i = 0; i < 16 && mask[i] == i; i++) {
+      }
+      MOZ_ASSERT(i < 16, "Should have been a MOVE operation");
+#  endif
+      masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest);
+      break;
+    }
+    case SimdPermuteOp::PERMUTE_16x8: {
+      const SimdConstant::I16x8& mask = control.asInt16x8();
+#  ifdef DEBUG
+      mozilla::DebugOnly<int> i;
+      for (i = 0; i < 8 && mask[i] == i; i++) {
+      }
+      MOZ_ASSERT(i < 8, "Should have been a MOVE operation");
+#  endif
+      masm.permuteInt16x8(reinterpret_cast<const uint16_t*>(mask), src, dest);
+      break;
+    }
+    case SimdPermuteOp::PERMUTE_32x4: {
+      const SimdConstant::I32x4& mask = control.asInt32x4();
+#  ifdef DEBUG
+      mozilla::DebugOnly<int> i;
+      for (i = 0; i < 4 && mask[i] == i; i++) {
+      }
+      MOZ_ASSERT(i < 4, "Should have been a MOVE operation");
+#  endif
+      masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest);
+      break;
+    }
+    case SimdPermuteOp::ROTATE_RIGHT_8x16: {
+      int8_t count = control.asInt8x16()[0];
+      MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
+      masm.rotateRightSimd128(src, dest, count);
+      break;
+    }
+    case SimdPermuteOp::SHIFT_LEFT_8x16: {
+      int8_t count = control.asInt8x16()[0];
+      MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
+      masm.leftShiftSimd128(Imm32(count), src, dest);
+      break;
+    }
+    case SimdPermuteOp::SHIFT_RIGHT_8x16: {
+      int8_t count = control.asInt8x16()[0];
+      MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
+      masm.rightShiftSimd128(Imm32(count), src, dest);
+      break;
+    }
+    case SimdPermuteOp::REVERSE_16x8:
+      masm.reverseInt16x8(src, dest);
+      break;
+    case SimdPermuteOp::REVERSE_32x4:
+      masm.reverseInt32x4(src, dest);
+      break;
+    case SimdPermuteOp::REVERSE_64x2:
+      masm.reverseInt64x2(src, dest);
+      break;
+    default: {
+      MOZ_CRASH("Unsupported SIMD permutation operation");
+    }
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MOZ_ASSERT(ToFloatRegister(ins->lhs()) == ToFloatRegister(ins->output()));
+  FloatRegister lhsDest = ToFloatRegister(ins->lhs());
+  const LAllocation* rhs = ins->rhs();
+  uint32_t laneIndex = ins->laneIndex();
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::I8x16ReplaceLane:
+      masm.replaceLaneInt8x16(laneIndex, ToRegister(rhs), lhsDest);
+      break;
+    case wasm::SimdOp::I16x8ReplaceLane:
+      masm.replaceLaneInt16x8(laneIndex, ToRegister(rhs), lhsDest);
+      break;
+    case wasm::SimdOp::I32x4ReplaceLane:
+      masm.replaceLaneInt32x4(laneIndex, ToRegister(rhs), lhsDest);
+      break;
+    case wasm::SimdOp::F32x4ReplaceLane:
+      masm.replaceLaneFloat32x4(laneIndex, ToFloatRegister(rhs), lhsDest);
+      break;
+    case wasm::SimdOp::F64x2ReplaceLane:
+      masm.replaceLaneFloat64x2(laneIndex, ToFloatRegister(rhs), lhsDest);
+      break;
+    default:
+      MOZ_CRASH("ReplaceLane SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReplaceInt64LaneSimd128(
+    LWasmReplaceInt64LaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MOZ_RELEASE_ASSERT(ins->simdOp() == wasm::SimdOp::I64x2ReplaceLane);
+  MOZ_ASSERT(ToFloatRegister(ins->lhs()) == ToFloatRegister(ins->output()));
+  masm.replaceLaneInt64x2(ins->laneIndex(), ToRegister64(ins->rhs()),
+                          ToFloatRegister(ins->lhs()));
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister dest = ToFloatRegister(ins->output());
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::I8x16Splat:
+      masm.splatX16(ToRegister(ins->src()), dest);
+      break;
+    case wasm::SimdOp::I16x8Splat:
+      masm.splatX8(ToRegister(ins->src()), dest);
+      break;
+    case wasm::SimdOp::I32x4Splat:
+      masm.splatX4(ToRegister(ins->src()), dest);
+      break;
+    case wasm::SimdOp::F32x4Splat:
+      masm.splatX4(ToFloatRegister(ins->src()), dest);
+      break;
+    case wasm::SimdOp::F64x2Splat:
+      masm.splatX2(ToFloatRegister(ins->src()), dest);
+      break;
+    default:
+      MOZ_CRASH("ScalarToSimd128 SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  Register64 src = ToRegister64(ins->src());
+  FloatRegister dest = ToFloatRegister(ins->output());
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::I64x2Splat:
+      masm.splatX2(src, dest);
+      break;
+    case wasm::SimdOp::V128Load8x8S:
+      masm.moveGPR64ToDouble(src, dest);
+      masm.widenLowInt8x16(dest, dest);
+      break;
+    case wasm::SimdOp::V128Load8x8U:
+      masm.moveGPR64ToDouble(src, dest);
+      masm.unsignedWidenLowInt8x16(dest, dest);
+      break;
+    case wasm::SimdOp::V128Load16x4S:
+      masm.moveGPR64ToDouble(src, dest);
+      masm.widenLowInt16x8(dest, dest);
+      break;
+    case wasm::SimdOp::V128Load16x4U:
+      masm.moveGPR64ToDouble(src, dest);
+      masm.unsignedWidenLowInt16x8(dest, dest);
+      break;
+    case wasm::SimdOp::V128Load32x2S:
+      masm.moveGPR64ToDouble(src, dest);
+      masm.widenLowInt32x4(dest, dest);
+      break;
+    case wasm::SimdOp::V128Load32x2U:
+      masm.moveGPR64ToDouble(src, dest);
+      masm.unsignedWidenLowInt32x4(dest, dest);
+      break;
+    default:
+      MOZ_CRASH("Int64ToSimd128 SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister src = ToFloatRegister(ins->src());
+  FloatRegister dest = ToFloatRegister(ins->output());
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::I8x16Neg:
+      masm.negInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I16x8Neg:
+      masm.negInt16x8(src, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtendLowI8x16S:
+      masm.widenLowInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtendHighI8x16S:
+      masm.widenHighInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtendLowI8x16U:
+      masm.unsignedWidenLowInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtendHighI8x16U:
+      masm.unsignedWidenHighInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I32x4Neg:
+      masm.negInt32x4(src, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtendLowI16x8S:
+      masm.widenLowInt16x8(src, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtendHighI16x8S:
+      masm.widenHighInt16x8(src, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtendLowI16x8U:
+      masm.unsignedWidenLowInt16x8(src, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtendHighI16x8U:
+      masm.unsignedWidenHighInt16x8(src, dest);
+      break;
+    case wasm::SimdOp::I32x4TruncSatF32x4S:
+      masm.truncSatFloat32x4ToInt32x4(src, dest);
+      break;
+    case wasm::SimdOp::I32x4TruncSatF32x4U:
+      masm.unsignedTruncSatFloat32x4ToInt32x4(src, dest);
+      break;
+    case wasm::SimdOp::I64x2Neg:
+      masm.negInt64x2(src, dest);
+      break;
+    case wasm::SimdOp::I64x2ExtendLowI32x4S:
+      masm.widenLowInt32x4(src, dest);
+      break;
+    case wasm::SimdOp::I64x2ExtendHighI32x4S:
+      masm.widenHighInt32x4(src, dest);
+      break;
+    case wasm::SimdOp::I64x2ExtendLowI32x4U:
+      masm.unsignedWidenLowInt32x4(src, dest);
+      break;
+    case wasm::SimdOp::I64x2ExtendHighI32x4U:
+      masm.unsignedWidenHighInt32x4(src, dest);
+      break;
+    case wasm::SimdOp::F32x4Abs:
+      masm.absFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F32x4Neg:
+      masm.negFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F32x4Sqrt:
+      masm.sqrtFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F32x4ConvertI32x4S:
+      masm.convertInt32x4ToFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F32x4ConvertI32x4U:
+      masm.unsignedConvertInt32x4ToFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F64x2Abs:
+      masm.absFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::F64x2Neg:
+      masm.negFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::F64x2Sqrt:
+      masm.sqrtFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::V128Not:
+      masm.bitwiseNotSimd128(src, dest);
+      break;
+    case wasm::SimdOp::I8x16Abs:
+      masm.absInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I16x8Abs:
+      masm.absInt16x8(src, dest);
+      break;
+    case wasm::SimdOp::I32x4Abs:
+      masm.absInt32x4(src, dest);
+      break;
+    case wasm::SimdOp::I64x2Abs:
+      masm.absInt64x2(src, dest);
+      break;
+    case wasm::SimdOp::F32x4Ceil:
+      masm.ceilFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F32x4Floor:
+      masm.floorFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F32x4Trunc:
+      masm.truncFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F32x4Nearest:
+      masm.nearestFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F64x2Ceil:
+      masm.ceilFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::F64x2Floor:
+      masm.floorFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::F64x2Trunc:
+      masm.truncFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::F64x2Nearest:
+      masm.nearestFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::F32x4DemoteF64x2Zero:
+      masm.convertFloat64x2ToFloat32x4(src, dest);
+      break;
+    case wasm::SimdOp::F64x2PromoteLowF32x4:
+      masm.convertFloat32x4ToFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::F64x2ConvertLowI32x4S:
+      masm.convertInt32x4ToFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::F64x2ConvertLowI32x4U:
+      masm.unsignedConvertInt32x4ToFloat64x2(src, dest);
+      break;
+    case wasm::SimdOp::I32x4TruncSatF64x2SZero:
+      masm.truncSatFloat64x2ToInt32x4(src, dest, ToFloatRegister(ins->temp()));
+      break;
+    case wasm::SimdOp::I32x4TruncSatF64x2UZero:
+      masm.unsignedTruncSatFloat64x2ToInt32x4(src, dest,
+                                              ToFloatRegister(ins->temp()));
+      break;
+    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
+      masm.extAddPairwiseInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
+      masm.unsignedExtAddPairwiseInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
+      masm.extAddPairwiseInt16x8(src, dest);
+      break;
+    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
+      masm.unsignedExtAddPairwiseInt16x8(src, dest);
+      break;
+    case wasm::SimdOp::I8x16Popcnt:
+      masm.popcntInt8x16(src, dest);
+      break;
+    case wasm::SimdOp::I32x4RelaxedTruncF32x4S:
+      masm.truncFloat32x4ToInt32x4Relaxed(src, dest);
+      break;
+    case wasm::SimdOp::I32x4RelaxedTruncF32x4U:
+      masm.unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest);
+      break;
+    case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero:
+      masm.truncFloat64x2ToInt32x4Relaxed(src, dest);
+      break;
+    case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero:
+      masm.unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest);
+      break;
+    default:
+      MOZ_CRASH("Unary SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister src = ToFloatRegister(ins->src());
+  const LDefinition* dest = ins->output();
+  uint32_t imm = ins->imm();
+  FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->getTemp(0));
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::V128AnyTrue:
+      masm.anyTrueSimd128(src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I8x16AllTrue:
+      masm.allTrueInt8x16(src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I16x8AllTrue:
+      masm.allTrueInt16x8(src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I32x4AllTrue:
+      masm.allTrueInt32x4(src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I64x2AllTrue:
+      masm.allTrueInt64x2(src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I8x16Bitmask:
+      masm.bitmaskInt8x16(src, ToRegister(dest), temp);
+      break;
+    case wasm::SimdOp::I16x8Bitmask:
+      masm.bitmaskInt16x8(src, ToRegister(dest), temp);
+      break;
+    case wasm::SimdOp::I32x4Bitmask:
+      masm.bitmaskInt32x4(src, ToRegister(dest), temp);
+      break;
+    case wasm::SimdOp::I64x2Bitmask:
+      masm.bitmaskInt64x2(src, ToRegister(dest), temp);
+      break;
+    case wasm::SimdOp::I8x16ExtractLaneS:
+      masm.extractLaneInt8x16(imm, src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I8x16ExtractLaneU:
+      masm.unsignedExtractLaneInt8x16(imm, src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I16x8ExtractLaneS:
+      masm.extractLaneInt16x8(imm, src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I16x8ExtractLaneU:
+      masm.unsignedExtractLaneInt16x8(imm, src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::I32x4ExtractLane:
+      masm.extractLaneInt32x4(imm, src, ToRegister(dest));
+      break;
+    case wasm::SimdOp::F32x4ExtractLane:
+      masm.extractLaneFloat32x4(imm, src, ToFloatRegister(dest));
+      break;
+    case wasm::SimdOp::F64x2ExtractLane:
+      masm.extractLaneFloat64x2(imm, src, ToFloatRegister(dest));
+      break;
+    default:
+      MOZ_CRASH("Reduce SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReduceAndBranchSimd128(
+    LWasmReduceAndBranchSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister src = ToFloatRegister(ins->src());
+
+  ScratchSimd128Scope scratch(masm);
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  const Register test = temps.AcquireX().asUnsized();
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::V128AnyTrue:
+      masm.Addp(Simd1D(scratch), Simd2D(src));
+      masm.Umov(ARMRegister(test, 64), Simd1D(scratch), 0);
+      masm.branch64(Assembler::Equal, Register64(test), Imm64(0),
+                    getJumpLabelForBranch(ins->ifFalse()));
+      jumpToBlock(ins->ifTrue());
+      break;
+    case wasm::SimdOp::I8x16AllTrue:
+    case wasm::SimdOp::I16x8AllTrue:
+    case wasm::SimdOp::I32x4AllTrue:
+    case wasm::SimdOp::I64x2AllTrue: {
+      // Compare all lanes to zero.
+      switch (ins->simdOp()) {
+        case wasm::SimdOp::I8x16AllTrue:
+          masm.Cmeq(Simd16B(scratch), Simd16B(src), 0);
+          break;
+        case wasm::SimdOp::I16x8AllTrue:
+          masm.Cmeq(Simd8H(scratch), Simd8H(src), 0);
+          break;
+        case wasm::SimdOp::I32x4AllTrue:
+          masm.Cmeq(Simd4S(scratch), Simd4S(src), 0);
+          break;
+        case wasm::SimdOp::I64x2AllTrue:
+          masm.Cmeq(Simd2D(scratch), Simd2D(src), 0);
+          break;
+        default:
+          MOZ_CRASH();
+      }
+      masm.Addp(Simd1D(scratch), Simd2D(scratch));
+      masm.Umov(ARMRegister(test, 64), Simd1D(scratch), 0);
+      masm.branch64(Assembler::NotEqual, Register64(test), Imm64(0),
+                    getJumpLabelForBranch(ins->ifFalse()));
+      jumpToBlock(ins->ifTrue());
+      break;
+    }
+    default:
+      MOZ_CRASH("Reduce-and-branch SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReduceSimd128ToInt64(
+    LWasmReduceSimd128ToInt64* ins) {
+#ifdef ENABLE_WASM_SIMD
+  FloatRegister src = ToFloatRegister(ins->src());
+  Register64 dest = ToOutRegister64(ins);
+  uint32_t imm = ins->imm();
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::I64x2ExtractLane:
+      masm.extractLaneInt64x2(imm, src, dest);
+      break;
+    default:
+      MOZ_CRASH("Reduce SimdOp not implemented");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+static inline wasm::MemoryAccessDesc DeriveMemoryAccessDesc(
+    const wasm::MemoryAccessDesc& access, Scalar::Type type) {
+  return wasm::MemoryAccessDesc(type, access.align(), access.offset(),
+                                access.trapOffset());
+}
+
+void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  // Forward loading to wasmLoad, and use replaceLane after that.
+  const MWasmLoadLaneSimd128* mir = ins->mir();
+  Register temp = ToRegister(ins->temp());
+  FloatRegister src = ToFloatRegister(ins->src());
+  FloatRegister dest = ToFloatRegister(ins->output());
+  // replaceLane takes an lhsDest argument.
+  masm.moveSimd128(src, dest);
+  switch (ins->laneSize()) {
+    case 1: {
+      masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int8),
+                    HeapReg, ToRegister(ins->ptr()), AnyRegister(temp));
+      masm.replaceLaneInt8x16(ins->laneIndex(), temp, dest);
+      break;
+    }
+    case 2: {
+      masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int16),
+                    HeapReg, ToRegister(ins->ptr()), AnyRegister(temp));
+      masm.replaceLaneInt16x8(ins->laneIndex(), temp, dest);
+      break;
+    }
+    case 4: {
+      masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int32),
+                    HeapReg, ToRegister(ins->ptr()), AnyRegister(temp));
+      masm.replaceLaneInt32x4(ins->laneIndex(), temp, dest);
+      break;
+    }
+    case 8: {
+      masm.wasmLoadI64(DeriveMemoryAccessDesc(mir->access(), Scalar::Int64),
+                       HeapReg, ToRegister(ins->ptr()), Register64(temp));
+      masm.replaceLaneInt64x2(ins->laneIndex(), Register64(temp), dest);
+      break;
+    }
+    default:
+      MOZ_CRASH("Unsupported load lane size");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  // Forward storing to wasmStore for the result of extractLane.
+  const MWasmStoreLaneSimd128* mir = ins->mir();
+  Register temp = ToRegister(ins->temp());
+  FloatRegister src = ToFloatRegister(ins->src());
+  switch (ins->laneSize()) {
+    case 1: {
+      masm.extractLaneInt8x16(ins->laneIndex(), src, temp);
+      masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int8),
+                     AnyRegister(temp), HeapReg, ToRegister(ins->ptr()));
+      break;
+    }
+    case 2: {
+      masm.extractLaneInt16x8(ins->laneIndex(), src, temp);
+      masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int16),
+                     AnyRegister(temp), HeapReg, ToRegister(ins->ptr()));
+      break;
+    }
+    case 4: {
+      masm.extractLaneInt32x4(ins->laneIndex(), src, temp);
+      masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int32),
+                     AnyRegister(temp), HeapReg, ToRegister(ins->ptr()));
+      break;
+    }
+    case 8: {
+      masm.extractLaneInt64x2(ins->laneIndex(), src, Register64(temp));
+      masm.wasmStoreI64(DeriveMemoryAccessDesc(mir->access(), Scalar::Int64),
+                        Register64(temp), HeapReg, ToRegister(ins->ptr()));
+      break;
+    }
+    default:
+      MOZ_CRASH("Unsupported store lane size");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
diff --git a/js/src/jit/arm64/CodeGenerator-arm64.h b/js/src/jit/arm64/CodeGenerator-arm64.h
new file mode 100644
index 0000000000..43cd24fddf
--- /dev/null
+++ b/js/src/jit/arm64/CodeGenerator-arm64.h
@@ -0,0 +1,135 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_CodeGenerator_arm64_h
+#define jit_arm64_CodeGenerator_arm64_h
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/shared/CodeGenerator-shared.h"
+
+namespace js {
+namespace jit {
+
+class CodeGeneratorARM64;
+class OutOfLineBailout;
+class OutOfLineTableSwitch;
+
+using OutOfLineWasmTruncateCheck =
+    OutOfLineWasmTruncateCheckBase<CodeGeneratorARM64>;
+
+class CodeGeneratorARM64 : public CodeGeneratorShared {
+  friend class MoveResolverARM64;
+
+ protected:
+  CodeGeneratorARM64(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
+
+  NonAssertingLabel deoptLabel_;
+
+  MoveOperand toMoveOperand(const LAllocation a) const;
+
+  void bailoutIf(Assembler::Condition condition, LSnapshot* snapshot);
+  void bailoutFrom(Label* label, LSnapshot* snapshot);
+  void bailout(LSnapshot* snapshot);
+
+  template <typename T1, typename T2>
+  void bailoutCmpPtr(Assembler::Condition c, T1 lhs, T2 rhs,
+                     LSnapshot* snapshot) {
+    masm.cmpPtr(lhs, rhs);
+    return bailoutIf(c, snapshot);
+  }
+  void bailoutTestPtr(Assembler::Condition c, Register lhs, Register rhs,
+                      LSnapshot* snapshot) {
+    masm.testPtr(lhs, rhs);
+    return bailoutIf(c, snapshot);
+  }
+  template <typename T1, typename T2>
+  void bailoutCmp32(Assembler::Condition c, T1 lhs, T2 rhs,
+                    LSnapshot* snapshot) {
+    masm.cmp32(lhs, rhs);
+    return bailoutIf(c, snapshot);
+  }
+  template <typename T1, typename T2>
+  void bailoutTest32(Assembler::Condition c, T1 lhs, T2 rhs,
+                     LSnapshot* snapshot) {
+    masm.test32(lhs, rhs);
+    return bailoutIf(c, snapshot);
+  }
+  void bailoutIfFalseBool(Register reg, LSnapshot* snapshot) {
+    masm.test32(reg, Imm32(0xFF));
+    return bailoutIf(Assembler::Zero, snapshot);
+  }
+
+  bool generateOutOfLineCode();
+
+  // Emits a branch that directs control flow to the true block if |cond| is
+  // true, and the false block if |cond| is false.
+  void emitBranch(Assembler::Condition cond, MBasicBlock* ifTrue,
+                  MBasicBlock* ifFalse);
+
+  void testNullEmitBranch(Assembler::Condition cond, const ValueOperand& value,
+                          MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+    cond = masm.testNull(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testUndefinedEmitBranch(Assembler::Condition cond,
+                               const ValueOperand& value, MBasicBlock* ifTrue,
+                               MBasicBlock* ifFalse) {
+    cond = masm.testUndefined(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testObjectEmitBranch(Assembler::Condition cond,
+                            const ValueOperand& value, MBasicBlock* ifTrue,
+                            MBasicBlock* ifFalse) {
+    cond = masm.testObject(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testZeroEmitBranch(Assembler::Condition cond, Register reg,
+                          MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+    MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+    masm.cmpPtr(reg, ImmWord(0));
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+
+  void emitTableSwitchDispatch(MTableSwitch* mir, Register index,
+                               Register base);
+
+  void emitBigIntDiv(LBigIntDiv* ins, Register dividend, Register divisor,
+                     Register output, Label* fail);
+  void emitBigIntMod(LBigIntMod* ins, Register dividend, Register divisor,
+                     Register output, Label* fail);
+  void emitSimpleBinaryI64(
+      LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* lir, JSOp op);
+
+  ValueOperand ToValue(LInstruction* ins, size_t pos);
+  ValueOperand ToTempValue(LInstruction* ins, size_t pos);
+
+  void generateInvalidateEpilogue();
+
+ public:
+  void visitOutOfLineBailout(OutOfLineBailout* ool);
+  void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool);
+  void visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool);
+};
+
+typedef CodeGeneratorARM64 CodeGeneratorSpecific;
+
+// An out-of-line bailout thunk.
+class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM64> {
+ protected:  // Silence Clang warning.
+  LSnapshot* snapshot_;
+
+ public:
+  explicit OutOfLineBailout(LSnapshot* snapshot) : snapshot_(snapshot) {}
+
+  void accept(CodeGeneratorARM64* codegen) override;
+
+  LSnapshot* snapshot() const { return snapshot_; }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm64_CodeGenerator_arm64_h */
diff --git a/js/src/jit/arm64/LIR-arm64.h b/js/src/jit/arm64/LIR-arm64.h
new file mode 100644
index 0000000000..d825209b1e
--- /dev/null
+++ b/js/src/jit/arm64/LIR-arm64.h
@@ -0,0 +1,373 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_LIR_arm64_h
+#define jit_arm64_LIR_arm64_h
+
+namespace js {
+namespace jit {
+
+class LUnboxBase : public LInstructionHelper<1, 1, 0> {
+ public:
+  LUnboxBase(LNode::Opcode opcode, const LAllocation& input)
+      : LInstructionHelper(opcode) {
+    setOperand(0, input);
+  }
+
+  static const size_t Input = 0;
+
+  MUnbox* mir() const { return mir_->toUnbox(); }
+};
+
+class LUnbox : public LUnboxBase {
+ public:
+  LIR_HEADER(Unbox);
+
+  explicit LUnbox(const LAllocation& input) : LUnboxBase(classOpcode, input) {}
+
+  const char* extraName() const { return StringFromMIRType(mir()->type()); }
+};
+
+class LUnboxFloatingPoint : public LUnboxBase {
+  MIRType type_;
+
+ public:
+  LIR_HEADER(UnboxFloatingPoint);
+
+  LUnboxFloatingPoint(const LAllocation& input, MIRType type)
+      : LUnboxBase(classOpcode, input), type_(type) {}
+
+  MIRType type() const { return type_; }
+  const char* extraName() const { return StringFromMIRType(type_); }
+};
+
+// Convert a 32-bit unsigned integer to a double.
+class LWasmUint32ToDouble : public LInstructionHelper<1, 1, 0> {
+ public:
+  LIR_HEADER(WasmUint32ToDouble)
+
+  explicit LWasmUint32ToDouble(const LAllocation& input)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, input);
+  }
+};
+
+// Convert a 32-bit unsigned integer to a float32.
+class LWasmUint32ToFloat32 : public LInstructionHelper<1, 1, 0> {
+ public:
+  LIR_HEADER(WasmUint32ToFloat32)
+
+  explicit LWasmUint32ToFloat32(const LAllocation& input)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, input);
+  }
+};
+
+class LDivI : public LBinaryMath<1> {
+ public:
+  LIR_HEADER(DivI);
+
+  LDivI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+    setTemp(0, temp);
+  }
+
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LDivPowTwoI : public LInstructionHelper<1, 1, 0> {
+  const int32_t shift_;
+  const bool negativeDivisor_;
+
+ public:
+  LIR_HEADER(DivPowTwoI)
+
+  LDivPowTwoI(const LAllocation& lhs, int32_t shift, bool negativeDivisor)
+      : LInstructionHelper(classOpcode),
+        shift_(shift),
+        negativeDivisor_(negativeDivisor) {
+    setOperand(0, lhs);
+  }
+
+  const LAllocation* numerator() { return getOperand(0); }
+
+  int32_t shift() { return shift_; }
+  bool negativeDivisor() { return negativeDivisor_; }
+
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LDivConstantI : public LInstructionHelper<1, 1, 1> {
+  const int32_t denominator_;
+
+ public:
+  LIR_HEADER(DivConstantI)
+
+  LDivConstantI(const LAllocation& lhs, int32_t denominator,
+                const LDefinition& temp)
+      : LInstructionHelper(classOpcode), denominator_(denominator) {
+    setOperand(0, lhs);
+    setTemp(0, temp);
+  }
+
+  const LAllocation* numerator() { return getOperand(0); }
+  const LDefinition* temp() { return getTemp(0); }
+  int32_t denominator() const { return denominator_; }
+  MDiv* mir() const { return mir_->toDiv(); }
+  bool canBeNegativeDividend() const { return mir()->canBeNegativeDividend(); }
+};
+
+class LUDivConstantI : public LInstructionHelper<1, 1, 1> {
+  const int32_t denominator_;
+
+ public:
+  LIR_HEADER(UDivConstantI)
+
+  LUDivConstantI(const LAllocation& lhs, int32_t denominator,
+                 const LDefinition& temp)
+      : LInstructionHelper(classOpcode), denominator_(denominator) {
+    setOperand(0, lhs);
+    setTemp(0, temp);
+  }
+
+  const LAllocation* numerator() { return getOperand(0); }
+  const LDefinition* temp() { return getTemp(0); }
+  int32_t denominator() const { return denominator_; }
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LModI : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(ModI);
+
+  LModI(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+class LModPowTwoI : public LInstructionHelper<1, 1, 0> {
+  const int32_t shift_;
+
+ public:
+  LIR_HEADER(ModPowTwoI);
+  int32_t shift() { return shift_; }
+
+  LModPowTwoI(const LAllocation& lhs, int32_t shift)
+      : LInstructionHelper(classOpcode), shift_(shift) {
+    setOperand(0, lhs);
+  }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+class LModMaskI : public LInstructionHelper<1, 1, 2> {
+  const int32_t shift_;
+
+ public:
+  LIR_HEADER(ModMaskI);
+
+  LModMaskI(const LAllocation& lhs, const LDefinition& temp1,
+            const LDefinition& temp2, int32_t shift)
+      : LInstructionHelper(classOpcode), shift_(shift) {
+    setOperand(0, lhs);
+    setTemp(0, temp1);
+    setTemp(1, temp2);
+  }
+
+  int32_t shift() const { return shift_; }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+// Takes a tableswitch with an integer to decide
+class LTableSwitch : public LInstructionHelper<0, 1, 2> {
+ public:
+  LIR_HEADER(TableSwitch);
+
+  LTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
+               const LDefinition& jumpTablePointer, MTableSwitch* ins)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, in);
+    setTemp(0, inputCopy);
+    setTemp(1, jumpTablePointer);
+    setMir(ins);
+  }
+
+  MTableSwitch* mir() const { return mir_->toTableSwitch(); }
+
+  const LAllocation* index() { return getOperand(0); }
+  const LDefinition* tempInt() { return getTemp(0); }
+  // This is added to share the same CodeGenerator prefixes.
+  const LDefinition* tempPointer() { return getTemp(1); }
+};
+
+// Takes a tableswitch with an integer to decide
+class LTableSwitchV : public LInstructionHelper<0, BOX_PIECES, 3> {
+ public:
+  LIR_HEADER(TableSwitchV);
+
+  LTableSwitchV(const LBoxAllocation& input, const LDefinition& inputCopy,
+                const LDefinition& floatCopy,
+                const LDefinition& jumpTablePointer, MTableSwitch* ins)
+      : LInstructionHelper(classOpcode) {
+    setBoxOperand(InputValue, input);
+    setTemp(0, inputCopy);
+    setTemp(1, floatCopy);
+    setTemp(2, jumpTablePointer);
+    setMir(ins);
+  }
+
+  MTableSwitch* mir() const { return mir_->toTableSwitch(); }
+
+  static const size_t InputValue = 0;
+
+  const LDefinition* tempInt() { return getTemp(0); }
+  const LDefinition* tempFloat() { return getTemp(1); }
+  const LDefinition* tempPointer() { return getTemp(2); }
+};
+
+class LMulI : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(MulI);
+
+  LMulI() : LBinaryMath(classOpcode) {}
+
+  MMul* mir() { return mir_->toMul(); }
+};
+
+class LUDiv : public LBinaryMath<1> {
+ public:
+  LIR_HEADER(UDiv);
+
+  LUDiv(const LAllocation& lhs, const LAllocation& rhs,
+        const LDefinition& remainder)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+    setTemp(0, remainder);
+  }
+
+  const LDefinition* remainder() { return getTemp(0); }
+
+  MDiv* mir() { return mir_->toDiv(); }
+};
+
+class LUMod : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(UMod);
+
+  LUMod(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MMod* mir() { return mir_->toMod(); }
+};
+
+class LInt64ToFloatingPoint : public LInstructionHelper<1, 1, 0> {
+ public:
+  LIR_HEADER(Int64ToFloatingPoint);
+
+  explicit LInt64ToFloatingPoint(const LInt64Allocation& in)
+      : LInstructionHelper(classOpcode) {
+    setInt64Operand(0, in);
+  }
+
+  MInt64ToFloatingPoint* mir() const { return mir_->toInt64ToFloatingPoint(); }
+};
+
+class LWasmTruncateToInt64 : public LInstructionHelper<1, 1, 0> {
+ public:
+  LIR_HEADER(WasmTruncateToInt64);
+
+  explicit LWasmTruncateToInt64(const LAllocation& in)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, in);
+  }
+
+  MWasmTruncateToInt64* mir() const { return mir_->toWasmTruncateToInt64(); }
+};
+
+class LDivOrModI64 : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(DivOrModI64)
+
+  LDivOrModI64(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MBinaryArithInstruction* mir() const {
+    MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
+    return static_cast<MBinaryArithInstruction*>(mir_);
+  }
+
+  bool canBeDivideByZero() const {
+    if (mir_->isMod()) {
+      return mir_->toMod()->canBeDivideByZero();
+    }
+    return mir_->toDiv()->canBeDivideByZero();
+  }
+  bool canBeNegativeOverflow() const {
+    if (mir_->isMod()) {
+      return mir_->toMod()->canBeNegativeDividend();
+    }
+    return mir_->toDiv()->canBeNegativeOverflow();
+  }
+  wasm::BytecodeOffset bytecodeOffset() const {
+    MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
+    if (mir_->isMod()) {
+      return mir_->toMod()->bytecodeOffset();
+    }
+    return mir_->toDiv()->bytecodeOffset();
+  }
+};
+
+class LUDivOrModI64 : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(UDivOrModI64);
+
+  LUDivOrModI64(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  const char* extraName() const {
+    return mir()->isTruncated() ? "Truncated" : nullptr;
+  }
+
+  MBinaryArithInstruction* mir() const {
+    MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
+    return static_cast<MBinaryArithInstruction*>(mir_);
+  }
+  bool canBeDivideByZero() const {
+    if (mir_->isMod()) {
+      return mir_->toMod()->canBeDivideByZero();
+    }
+    return mir_->toDiv()->canBeDivideByZero();
+  }
+  wasm::BytecodeOffset bytecodeOffset() const {
+    MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
+    if (mir_->isMod()) {
+      return mir_->toMod()->bytecodeOffset();
+    }
+    return mir_->toDiv()->bytecodeOffset();
+  }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm64_LIR_arm64_h */
diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp
new file mode 100644
index 0000000000..d71f22089d
--- /dev/null
+++ b/js/src/jit/arm64/Lowering-arm64.cpp
@@ -0,0 +1,1438 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/Lowering-arm64.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/Lowering.h"
+#include "jit/MIR.h"
+#include "jit/shared/Lowering-shared-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::FloorLog2;
+
+LBoxAllocation LIRGeneratorARM64::useBoxFixed(MDefinition* mir, Register reg1,
+                                              Register, bool useAtStart) {
+  MOZ_ASSERT(mir->type() == MIRType::Value);
+
+  ensureDefined(mir);
+  return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart));
+}
+
+LAllocation LIRGeneratorARM64::useByteOpRegister(MDefinition* mir) {
+  return useRegister(mir);
+}
+
+LAllocation LIRGeneratorARM64::useByteOpRegisterAtStart(MDefinition* mir) {
+  return useRegisterAtStart(mir);
+}
+
+LAllocation LIRGeneratorARM64::useByteOpRegisterOrNonDoubleConstant(
+    MDefinition* mir) {
+  return useRegisterOrNonDoubleConstant(mir);
+}
+
+LDefinition LIRGeneratorARM64::tempByteOpRegister() { return temp(); }
+
+LDefinition LIRGeneratorARM64::tempToUnbox() { return temp(); }
+
+void LIRGenerator::visitBox(MBox* box) {
+  MDefinition* opd = box->getOperand(0);
+
+  // If the operand is a constant, emit near its uses.
+  if (opd->isConstant() && box->canEmitAtUses()) {
+    emitAtUses(box);
+    return;
+  }
+
+  if (opd->isConstant()) {
+    define(new (alloc()) LValue(opd->toConstant()->toJSValue()), box,
+           LDefinition(LDefinition::BOX));
+  } else {
+    LBox* ins = new (alloc()) LBox(useRegister(opd), opd->type());
+    define(ins, box, LDefinition(LDefinition::BOX));
+  }
+}
+
+void LIRGenerator::visitUnbox(MUnbox* unbox) {
+  MDefinition* box = unbox->getOperand(0);
+  MOZ_ASSERT(box->type() == MIRType::Value);
+
+  LUnboxBase* lir;
+  if (IsFloatingPointType(unbox->type())) {
+    lir = new (alloc())
+        LUnboxFloatingPoint(useRegisterAtStart(box), unbox->type());
+  } else if (unbox->fallible()) {
+    // If the unbox is fallible, load the Value in a register first to
+    // avoid multiple loads.
+    lir = new (alloc()) LUnbox(useRegisterAtStart(box));
+  } else {
+    // FIXME: It should be possible to useAtStart() here, but the DEBUG
+    // code in CodeGenerator::visitUnbox() needs to handle non-Register
+    // cases. ARM64 doesn't have an Operand type.
+    lir = new (alloc()) LUnbox(useRegisterAtStart(box));
+  }
+
+  if (unbox->fallible()) {
+    assignSnapshot(lir, unbox->bailoutKind());
+  }
+
+  define(lir, unbox);
+}
+
+void LIRGenerator::visitReturnImpl(MDefinition* opd, bool isGenerator) {
+  MOZ_ASSERT(opd->type() == MIRType::Value);
+
+  LReturn* ins = new (alloc()) LReturn(isGenerator);
+  ins->setOperand(0, useFixed(opd, JSReturnReg));
+  add(ins);
+}
+
+// x = !y
+void LIRGeneratorARM64::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
+                                    MDefinition* mir, MDefinition* input) {
+  ins->setOperand(
+      0, ins->snapshot() ? useRegister(input) : useRegisterAtStart(input));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+// z = x+y
+void LIRGeneratorARM64::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
+                                    MDefinition* mir, MDefinition* lhs,
+                                    MDefinition* rhs) {
+  ins->setOperand(0,
+                  ins->snapshot() ? useRegister(lhs) : useRegisterAtStart(lhs));
+  ins->setOperand(1, ins->snapshot() ? useRegisterOrConstant(rhs)
+                                     : useRegisterOrConstantAtStart(rhs));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
+                                    MDefinition* mir, MDefinition* input) {
+  ins->setOperand(0, useRegisterAtStart(input));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+template <size_t Temps>
+void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
+                                    MDefinition* mir, MDefinition* lhs,
+                                    MDefinition* rhs) {
+  ins->setOperand(0, useRegisterAtStart(lhs));
+  ins->setOperand(1, useRegisterAtStart(rhs));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+template void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
+                                             MDefinition* mir, MDefinition* lhs,
+                                             MDefinition* rhs);
+template void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, 1>* ins,
+                                             MDefinition* mir, MDefinition* lhs,
+                                             MDefinition* rhs);
+
+void LIRGeneratorARM64::lowerForALUInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, MDefinition* mir,
+    MDefinition* input) {
+  ins->setInt64Operand(0, useInt64RegisterAtStart(input));
+  defineInt64(ins, mir);
+}
+
+// These all currently have codegen that depends on reuse but only because the
+// masm API depends on that.  We need new three-address masm APIs, for both
+// constant and variable rhs.
+//
+// MAdd => LAddI64
+// MSub => LSubI64
+// MBitAnd, MBitOr, MBitXor => LBitOpI64
+void LIRGeneratorARM64::lowerForALUInt64(
+    LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+  ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+  ins->setInt64Operand(INT64_PIECES, useInt64RegisterOrConstantAtStart(rhs));
+  defineInt64(ins, mir);
+}
+
+void LIRGeneratorARM64::lowerForMulInt64(LMulI64* ins, MMul* mir,
+                                         MDefinition* lhs, MDefinition* rhs) {
+  ins->setInt64Operand(LMulI64::Lhs, useInt64RegisterAtStart(lhs));
+  ins->setInt64Operand(LMulI64::Rhs, useInt64RegisterOrConstantAtStart(rhs));
+  defineInt64(ins, mir);
+}
+
+template <size_t Temps>
+void LIRGeneratorARM64::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+  ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+
+  static_assert(LShiftI64::Rhs == INT64_PIECES,
+                "Assume Rhs is located at INT64_PIECES.");
+  static_assert(LRotateI64::Count == INT64_PIECES,
+                "Assume Count is located at INT64_PIECES.");
+
+  ins->setOperand(INT64_PIECES, useRegisterOrConstantAtStart(rhs));
+  defineInt64(ins, mir);
+}
+
+template void LIRGeneratorARM64::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorARM64::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+void LIRGeneratorARM64::lowerForCompareI64AndBranch(MTest* mir, MCompare* comp,
+                                                    JSOp op, MDefinition* left,
+                                                    MDefinition* right,
+                                                    MBasicBlock* ifTrue,
+                                                    MBasicBlock* ifFalse) {
+  auto* lir = new (alloc())
+      LCompareI64AndBranch(comp, op, useInt64Register(left),
+                           useInt64RegisterOrConstant(right), ifTrue, ifFalse);
+  add(lir, mir);
+}
+
+void LIRGeneratorARM64::lowerForBitAndAndBranch(LBitAndAndBranch* baab,
+                                                MInstruction* mir,
+                                                MDefinition* lhs,
+                                                MDefinition* rhs) {
+  baab->setOperand(0, useRegisterAtStart(lhs));
+  baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
+  add(baab, mir);
+}
+
+void LIRGeneratorARM64::lowerWasmBuiltinTruncateToInt32(
+    MWasmBuiltinTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+  if (opd->type() == MIRType::Double) {
+    define(new (alloc()) LWasmBuiltinTruncateDToInt32(
+               useRegister(opd), useFixed(ins->instance(), InstanceReg),
+               LDefinition::BogusTemp()),
+           ins);
+    return;
+  }
+
+  define(new (alloc()) LWasmBuiltinTruncateFToInt32(
+             useRegister(opd), useFixed(ins->instance(), InstanceReg),
+             LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGeneratorARM64::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition,
+                                             LBlock* block, size_t lirIndex) {
+  lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
+}
+
+void LIRGeneratorARM64::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
+                                      MDefinition* mir, MDefinition* lhs,
+                                      MDefinition* rhs) {
+  ins->setOperand(0, useRegister(lhs));
+  ins->setOperand(1, useRegisterOrConstant(rhs));
+  define(ins, mir);
+}
+
+void LIRGeneratorARM64::lowerDivI(MDiv* div) {
+  if (div->isUnsigned()) {
+    lowerUDiv(div);
+    return;
+  }
+
+  if (div->rhs()->isConstant()) {
+    LAllocation lhs = useRegister(div->lhs());
+    int32_t rhs = div->rhs()->toConstant()->toInt32();
+    int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+    if (rhs != 0 && uint32_t(1) << shift == mozilla::Abs(rhs)) {
+      LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, shift, rhs < 0);
+      if (div->fallible()) {
+        assignSnapshot(lir, div->bailoutKind());
+      }
+      define(lir, div);
+      return;
+    }
+    if (rhs != 0) {
+      LDivConstantI* lir = new (alloc()) LDivConstantI(lhs, rhs, temp());
+      if (div->fallible()) {
+        assignSnapshot(lir, div->bailoutKind());
+      }
+      define(lir, div);
+      return;
+    }
+  }
+
+  LDivI* lir = new (alloc())
+      LDivI(useRegister(div->lhs()), useRegister(div->rhs()), temp());
+  if (div->fallible()) {
+    assignSnapshot(lir, div->bailoutKind());
+  }
+  define(lir, div);
+}
+
+void LIRGeneratorARM64::lowerNegI(MInstruction* ins, MDefinition* input) {
+  define(new (alloc()) LNegI(useRegisterAtStart(input)), ins);
+}
+
+void LIRGeneratorARM64::lowerNegI64(MInstruction* ins, MDefinition* input) {
+  defineInt64(new (alloc()) LNegI64(useInt64RegisterAtStart(input)), ins);
+}
+
+void LIRGeneratorARM64::lowerMulI(MMul* mul, MDefinition* lhs,
+                                  MDefinition* rhs) {
+  LMulI* lir = new (alloc()) LMulI;
+  if (mul->fallible()) {
+    assignSnapshot(lir, mul->bailoutKind());
+  }
+  lowerForALU(lir, mul, lhs, rhs);
+}
+
+void LIRGeneratorARM64::lowerModI(MMod* mod) {
+  if (mod->isUnsigned()) {
+    lowerUMod(mod);
+    return;
+  }
+
+  if (mod->rhs()->isConstant()) {
+    int32_t rhs = mod->rhs()->toConstant()->toInt32();
+    int32_t shift = FloorLog2(rhs);
+    if (rhs > 0 && 1 << shift == rhs) {
+      LModPowTwoI* lir =
+          new (alloc()) LModPowTwoI(useRegister(mod->lhs()), shift);
+      if (mod->fallible()) {
+        assignSnapshot(lir, mod->bailoutKind());
+      }
+      define(lir, mod);
+      return;
+    } else if (shift < 31 && (1 << (shift + 1)) - 1 == rhs) {
+      LModMaskI* lir = new (alloc())
+          LModMaskI(useRegister(mod->lhs()), temp(), temp(), shift + 1);
+      if (mod->fallible()) {
+        assignSnapshot(lir, mod->bailoutKind());
+      }
+      define(lir, mod);
+    }
+  }
+
+  LModI* lir =
+      new (alloc()) LModI(useRegister(mod->lhs()), useRegister(mod->rhs()));
+  if (mod->fallible()) {
+    assignSnapshot(lir, mod->bailoutKind());
+  }
+  define(lir, mod);
+}
+
+void LIRGeneratorARM64::lowerDivI64(MDiv* div) {
+  if (div->isUnsigned()) {
+    lowerUDivI64(div);
+    return;
+  }
+
+  LDivOrModI64* lir = new (alloc())
+      LDivOrModI64(useRegister(div->lhs()), useRegister(div->rhs()));
+  defineInt64(lir, div);
+}
+
+void LIRGeneratorARM64::lowerUDivI64(MDiv* div) {
+  LUDivOrModI64* lir = new (alloc())
+      LUDivOrModI64(useRegister(div->lhs()), useRegister(div->rhs()));
+  defineInt64(lir, div);
+}
+
+void LIRGeneratorARM64::lowerUModI64(MMod* mod) {
+  LUDivOrModI64* lir = new (alloc())
+      LUDivOrModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
+  defineInt64(lir, mod);
+}
+
+void LIRGeneratorARM64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
+  MOZ_CRASH("We don't use runtime div for this architecture");
+}
+
+void LIRGeneratorARM64::lowerModI64(MMod* mod) {
+  if (mod->isUnsigned()) {
+    lowerUModI64(mod);
+    return;
+  }
+
+  LDivOrModI64* lir = new (alloc())
+      LDivOrModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
+  defineInt64(lir, mod);
+}
+
+void LIRGeneratorARM64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
+  MOZ_CRASH("We don't use runtime mod for this architecture");
+}
+
+void LIRGenerator::visitPowHalf(MPowHalf* ins) {
+  MDefinition* input = ins->input();
+  MOZ_ASSERT(input->type() == MIRType::Double);
+  LPowHalfD* lir = new (alloc()) LPowHalfD(useRegister(input));
+  define(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerWasmSelectI(MWasmSelect* select) {
+  if (select->type() == MIRType::Simd128) {
+    LAllocation t = useRegisterAtStart(select->trueExpr());
+    LAllocation f = useRegister(select->falseExpr());
+    LAllocation c = useRegister(select->condExpr());
+    auto* lir = new (alloc()) LWasmSelect(t, f, c);
+    defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
+  } else {
+    LAllocation t = useRegisterAtStart(select->trueExpr());
+    LAllocation f = useRegisterAtStart(select->falseExpr());
+    LAllocation c = useRegisterAtStart(select->condExpr());
+    define(new (alloc()) LWasmSelect(t, f, c), select);
+  }
+}
+
+void LIRGeneratorARM64::lowerWasmSelectI64(MWasmSelect* select) {
+  LInt64Allocation t = useInt64RegisterAtStart(select->trueExpr());
+  LInt64Allocation f = useInt64RegisterAtStart(select->falseExpr());
+  LAllocation c = useRegisterAtStart(select->condExpr());
+  defineInt64(new (alloc()) LWasmSelectI64(t, f, c), select);
+}
+
+// On arm64 we specialize the cases: compare is {{U,}Int32, {U,}Int64},
+// Float32, Double}, and select is {{U,}Int32, {U,}Int64}, Float32, Double},
+// independently.
+bool LIRGeneratorARM64::canSpecializeWasmCompareAndSelect(
+    MCompare::CompareType compTy, MIRType insTy) {
+  return (insTy == MIRType::Int32 || insTy == MIRType::Int64 ||
+          insTy == MIRType::Float32 || insTy == MIRType::Double) &&
+         (compTy == MCompare::Compare_Int32 ||
+          compTy == MCompare::Compare_UInt32 ||
+          compTy == MCompare::Compare_Int64 ||
+          compTy == MCompare::Compare_UInt64 ||
+          compTy == MCompare::Compare_Float32 ||
+          compTy == MCompare::Compare_Double);
+}
+
+void LIRGeneratorARM64::lowerWasmCompareAndSelect(MWasmSelect* ins,
+                                                  MDefinition* lhs,
+                                                  MDefinition* rhs,
+                                                  MCompare::CompareType compTy,
+                                                  JSOp jsop) {
+  MOZ_ASSERT(canSpecializeWasmCompareAndSelect(compTy, ins->type()));
+  LAllocation rhsAlloc;
+  if (compTy == MCompare::Compare_Float32 ||
+      compTy == MCompare::Compare_Double) {
+    rhsAlloc = useRegisterAtStart(rhs);
+  } else if (compTy == MCompare::Compare_Int32 ||
+             compTy == MCompare::Compare_UInt32 ||
+             compTy == MCompare::Compare_Int64 ||
+             compTy == MCompare::Compare_UInt64) {
+    rhsAlloc = useRegisterOrConstantAtStart(rhs);
+  } else {
+    MOZ_CRASH("Unexpected type");
+  }
+  auto* lir = new (alloc())
+      LWasmCompareAndSelect(useRegisterAtStart(lhs), rhsAlloc, compTy, jsop,
+                            useRegisterAtStart(ins->trueExpr()),
+                            useRegisterAtStart(ins->falseExpr()));
+  define(lir, ins);
+}
+
+void LIRGenerator::visitAbs(MAbs* ins) {
+  define(allocateAbs(ins, useRegisterAtStart(ins->input())), ins);
+}
+
+LTableSwitch* LIRGeneratorARM64::newLTableSwitch(const LAllocation& in,
+                                                 const LDefinition& inputCopy,
+                                                 MTableSwitch* tableswitch) {
+  return new (alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
+}
+
+LTableSwitchV* LIRGeneratorARM64::newLTableSwitchV(MTableSwitch* tableswitch) {
+  return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(),
+                                     tempDouble(), temp(), tableswitch);
+}
+
+void LIRGeneratorARM64::lowerUrshD(MUrsh* mir) {
+  MDefinition* lhs = mir->lhs();
+  MDefinition* rhs = mir->rhs();
+
+  MOZ_ASSERT(lhs->type() == MIRType::Int32);
+  MOZ_ASSERT(rhs->type() == MIRType::Int32);
+
+  LUrshD* lir = new (alloc())
+      LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp());
+  define(lir, mir);
+}
+
+void LIRGeneratorARM64::lowerPowOfTwoI(MPow* mir) {
+  int32_t base = mir->input()->toConstant()->toInt32();
+  MDefinition* power = mir->power();
+
+  auto* lir = new (alloc()) LPowOfTwoI(useRegister(power), base);
+  assignSnapshot(lir, mir->bailoutKind());
+  define(lir, mir);
+}
+
+void LIRGeneratorARM64::lowerBigIntLsh(MBigIntLsh* ins) {
+  auto* lir = new (alloc()) LBigIntLsh(
+      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerBigIntRsh(MBigIntRsh* ins) {
+  auto* lir = new (alloc()) LBigIntRsh(
+      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerBigIntDiv(MBigIntDiv* ins) {
+  auto* lir = new (alloc()) LBigIntDiv(useRegister(ins->lhs()),
+                                       useRegister(ins->rhs()), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerBigIntMod(MBigIntMod* ins) {
+  auto* lir = new (alloc()) LBigIntMod(useRegister(ins->lhs()),
+                                       useRegister(ins->rhs()), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+#ifdef ENABLE_WASM_SIMD
+
+bool LIRGeneratorARM64::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
+  switch (op) {
+    case wasm::SimdOp::V128AnyTrue:
+    case wasm::SimdOp::I8x16AllTrue:
+    case wasm::SimdOp::I16x8AllTrue:
+    case wasm::SimdOp::I32x4AllTrue:
+    case wasm::SimdOp::I64x2AllTrue:
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool LIRGeneratorARM64::canEmitWasmReduceSimd128AtUses(
+    MWasmReduceSimd128* ins) {
+  if (!ins->canEmitAtUses()) {
+    return false;
+  }
+  // Only specific ops generating int32.
+  if (ins->type() != MIRType::Int32) {
+    return false;
+  }
+  if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
+    return false;
+  }
+  // If never used then defer (it will be removed).
+  MUseIterator iter(ins->usesBegin());
+  if (iter == ins->usesEnd()) {
+    return true;
+  }
+  // We require an MTest consumer.
+  MNode* node = iter->consumer();
+  if (!node->isDefinition() || !node->toDefinition()->isTest()) {
+    return false;
+  }
+  // Defer only if there's only one use.
+  iter++;
+  return iter == ins->usesEnd();
+}
+
+#endif
+
+void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
+  switch (ins->type()) {
+    case MIRType::Int32:
+      define(new (alloc()) LNegI(useRegisterAtStart(ins->input())), ins);
+      break;
+    case MIRType::Float32:
+      define(new (alloc()) LNegF(useRegisterAtStart(ins->input())), ins);
+      break;
+    case MIRType::Double:
+      define(new (alloc()) LNegD(useRegisterAtStart(ins->input())), ins);
+      break;
+    default:
+      MOZ_CRASH("unexpected type");
+  }
+}
+
+void LIRGeneratorARM64::lowerUDiv(MDiv* div) {
+  LAllocation lhs = useRegister(div->lhs());
+  if (div->rhs()->isConstant()) {
+    // NOTE: the result of toInt32 is coerced to uint32_t.
+    uint32_t rhs = div->rhs()->toConstant()->toInt32();
+    int32_t shift = mozilla::FloorLog2(rhs);
+
+    if (rhs != 0 && uint32_t(1) << shift == rhs) {
+      LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, shift, false);
+      if (div->fallible()) {
+        assignSnapshot(lir, div->bailoutKind());
+      }
+      define(lir, div);
+      return;
+    }
+
+    LUDivConstantI* lir = new (alloc()) LUDivConstantI(lhs, rhs, temp());
+    if (div->fallible()) {
+      assignSnapshot(lir, div->bailoutKind());
+    }
+    define(lir, div);
+    return;
+  }
+
+  // Generate UDiv
+  LAllocation rhs = useRegister(div->rhs());
+  LDefinition remainder = LDefinition::BogusTemp();
+  if (!div->canTruncateRemainder()) {
+    remainder = temp();
+  }
+
+  LUDiv* lir = new (alloc()) LUDiv(lhs, rhs, remainder);
+  if (div->fallible()) {
+    assignSnapshot(lir, div->bailoutKind());
+  }
+  define(lir, div);
+}
+
+void LIRGeneratorARM64::lowerUMod(MMod* mod) {
+  LUMod* lir = new (alloc())
+      LUMod(useRegister(mod->getOperand(0)), useRegister(mod->getOperand(1)));
+  if (mod->fallible()) {
+    assignSnapshot(lir, mod->bailoutKind());
+  }
+  define(lir, mod);
+}
+
+void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) {
+  MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
+  LWasmUint32ToDouble* lir =
+      new (alloc()) LWasmUint32ToDouble(useRegisterAtStart(ins->input()));
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) {
+  MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
+  LWasmUint32ToFloat32* lir =
+      new (alloc()) LWasmUint32ToFloat32(useRegisterAtStart(ins->input()));
+  define(lir, ins);
+}
+
+void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+  MOZ_ASSERT_IF(ins->needsBoundsCheck(),
+                boundsCheckLimit->type() == MIRType::Int32);
+
+  LAllocation baseAlloc = useRegisterAtStart(base);
+
+  LAllocation limitAlloc = ins->needsBoundsCheck()
+                               ? useRegisterAtStart(boundsCheckLimit)
+                               : LAllocation();
+
+  // We have no memory-base value, meaning that HeapReg is to be used as the
+  // memory base.  This follows from the definition of
+  // FunctionCompiler::maybeLoadMemoryBase() in WasmIonCompile.cpp.
+  MOZ_ASSERT(!ins->hasMemoryBase());
+  auto* lir =
+      new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, LAllocation());
+  define(lir, ins);
+}
+
+void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+  MOZ_ASSERT_IF(ins->needsBoundsCheck(),
+                boundsCheckLimit->type() == MIRType::Int32);
+
+  LAllocation baseAlloc = useRegisterAtStart(base);
+
+  LAllocation limitAlloc = ins->needsBoundsCheck()
+                               ? useRegisterAtStart(boundsCheckLimit)
+                               : LAllocation();
+
+  // See comment in LIRGenerator::visitAsmJSStoreHeap just above.
+  MOZ_ASSERT(!ins->hasMemoryBase());
+  add(new (alloc()) LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
+                                    limitAlloc, LAllocation()),
+      ins);
+}
+
+void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) {
+  MDefinition* base = ins->base();
+  // See comment in visitWasmLoad re the type of 'base'.
+  MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+  // Note, the access type may be Int64 here.
+
+  LWasmCompareExchangeHeap* lir = new (alloc())
+      LWasmCompareExchangeHeap(useRegister(base), useRegister(ins->oldValue()),
+                               useRegister(ins->newValue()));
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) {
+  MDefinition* base = ins->base();
+  // See comment in visitWasmLoad re the type of 'base'.
+  MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+  // Note, the access type may be Int64 here.
+
+  LWasmAtomicExchangeHeap* lir = new (alloc())
+      LWasmAtomicExchangeHeap(useRegister(base), useRegister(ins->value()));
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) {
+  MDefinition* base = ins->base();
+  // See comment in visitWasmLoad re the type of 'base'.
+  MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+  // Note, the access type may be Int64 here.
+
+  if (!ins->hasUses()) {
+    LWasmAtomicBinopHeapForEffect* lir =
+        new (alloc()) LWasmAtomicBinopHeapForEffect(useRegister(base),
+                                                    useRegister(ins->value()),
+                                                    /* flagTemp= */ temp());
+    add(lir, ins);
+    return;
+  }
+
+  LWasmAtomicBinopHeap* lir = new (alloc())
+      LWasmAtomicBinopHeap(useRegister(base), useRegister(ins->value()),
+                           /* temp= */ LDefinition::BogusTemp(),
+                           /* flagTemp= */ temp());
+  define(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerTruncateDToInt32(MTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Double);
+  define(new (alloc())
+             LTruncateDToInt32(useRegister(opd), LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGeneratorARM64::lowerTruncateFToInt32(MTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Float32);
+  define(new (alloc())
+             LTruncateFToInt32(useRegister(opd), LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGenerator::visitAtomicTypedArrayElementBinop(
+    MAtomicTypedArrayElementBinop* ins) {
+  MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+  LAllocation value = useRegister(ins->value());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    LInt64Definition temp1 = tempInt64();
+    LInt64Definition temp2 = tempInt64();
+
+    // Case 1: the result of the operation is not used.
+    //
+    // We can omit allocating the result BigInt.
+
+    if (ins->isForEffect()) {
+      auto* lir = new (alloc()) LAtomicTypedArrayElementBinopForEffect64(
+          elements, index, value, temp1, temp2);
+      add(lir, ins);
+      return;
+    }
+
+    // Case 2: the result of the operation is used.
+
+    auto* lir = new (alloc())
+        LAtomicTypedArrayElementBinop64(elements, index, value, temp1, temp2);
+    define(lir, ins);
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  if (ins->isForEffect()) {
+    auto* lir = new (alloc())
+        LAtomicTypedArrayElementBinopForEffect(elements, index, value, temp());
+    add(lir, ins);
+    return;
+  }
+
+  LDefinition tempDef1 = temp();
+  LDefinition tempDef2 = LDefinition::BogusTemp();
+  if (ins->arrayType() == Scalar::Uint32) {
+    tempDef2 = temp();
+  }
+
+  LAtomicTypedArrayElementBinop* lir = new (alloc())
+      LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitCompareExchangeTypedArrayElement(
+    MCompareExchangeTypedArrayElement* ins) {
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+  const LAllocation newval = useRegister(ins->newval());
+  const LAllocation oldval = useRegister(ins->oldval());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    LInt64Definition temp1 = tempInt64();
+    LInt64Definition temp2 = tempInt64();
+
+    auto* lir = new (alloc()) LCompareExchangeTypedArrayElement64(
+        elements, index, oldval, newval, temp1, temp2);
+    define(lir, ins);
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  // If the target is an FPReg then we need a temporary at the CodeGenerator
+  // level for creating the result.
+
+  LDefinition outTemp = LDefinition::BogusTemp();
+  if (ins->arrayType() == Scalar::Uint32) {
+    outTemp = temp();
+  }
+
+  LCompareExchangeTypedArrayElement* lir =
+      new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
+                                                      newval, outTemp);
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitAtomicExchangeTypedArrayElement(
+    MAtomicExchangeTypedArrayElement* ins) {
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+  const LAllocation value = useRegister(ins->value());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    LInt64Definition temp1 = tempInt64();
+    LDefinition temp2 = temp();
+
+    auto* lir = new (alloc()) LAtomicExchangeTypedArrayElement64(
+        elements, index, value, temp1, temp2);
+    define(lir, ins);
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
+
+  LDefinition tempDef = LDefinition::BogusTemp();
+  if (ins->arrayType() == Scalar::Uint32) {
+    tempDef = temp();
+  }
+
+  LAtomicExchangeTypedArrayElement* lir = new (alloc())
+      LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
+
+  define(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerAtomicLoad64(MLoadUnboxedScalar* ins) {
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->storageType());
+
+  auto* lir = new (alloc()) LAtomicLoad64(elements, index, temp(), tempInt64());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerAtomicStore64(MStoreUnboxedScalar* ins) {
+  LUse elements = useRegister(ins->elements());
+  LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->writeType());
+  LAllocation value = useRegister(ins->value());
+
+  add(new (alloc()) LAtomicStore64(elements, index, value, tempInt64()), ins);
+}
+
+void LIRGenerator::visitSubstr(MSubstr* ins) {
+  LSubstr* lir = new (alloc())
+      LSubstr(useRegister(ins->string()), useRegister(ins->begin()),
+              useRegister(ins->length()), temp(), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+  defineInt64(new (alloc()) LWasmTruncateToInt64(useRegister(opd)), ins);
+}
+
+void LIRGeneratorARM64::lowerWasmBuiltinTruncateToInt64(
+    MWasmBuiltinTruncateToInt64* ins) {
+  MOZ_CRASH("We don't use WasmBuiltinTruncateToInt64 for arm64");
+}
+
+void LIRGeneratorARM64::lowerBuiltinInt64ToFloatingPoint(
+    MBuiltinInt64ToFloatingPoint* ins) {
+  MOZ_CRASH("We don't use it for this architecture");
+}
+
+void LIRGenerator::visitWasmHeapBase(MWasmHeapBase* ins) {
+  auto* lir = new (alloc()) LWasmHeapBase(LAllocation());
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmLoad(MWasmLoad* ins) {
+  MDefinition* base = ins->base();
+  // 'base' is a GPR but may be of either type.  If it is 32-bit it is
+  // zero-extended and can act as 64-bit.
+  MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+  LAllocation ptr = useRegisterOrConstantAtStart(base);
+
+  if (ins->type() == MIRType::Int64) {
+    auto* lir = new (alloc()) LWasmLoadI64(ptr);
+    defineInt64(lir, ins);
+  } else {
+    auto* lir = new (alloc()) LWasmLoad(ptr);
+    define(lir, ins);
+  }
+}
+
+void LIRGenerator::visitWasmStore(MWasmStore* ins) {
+  MDefinition* base = ins->base();
+  // See comment in visitWasmLoad re the type of 'base'.
+  MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+  MDefinition* value = ins->value();
+
+  if (ins->access().type() == Scalar::Int64) {
+    LAllocation baseAlloc = useRegisterOrConstantAtStart(base);
+    LInt64Allocation valueAlloc = useInt64RegisterAtStart(value);
+    auto* lir = new (alloc()) LWasmStoreI64(baseAlloc, valueAlloc);
+    add(lir, ins);
+    return;
+  }
+
+  LAllocation baseAlloc = useRegisterOrConstantAtStart(base);
+  LAllocation valueAlloc = useRegisterAtStart(value);
+  auto* lir = new (alloc()) LWasmStore(baseAlloc, valueAlloc);
+  add(lir, ins);
+}
+
+void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Int64);
+  MOZ_ASSERT(IsFloatingPointType(ins->type()));
+
+  define(new (alloc()) LInt64ToFloatingPoint(useInt64Register(opd)), ins);
+}
+
+void LIRGenerator::visitCopySign(MCopySign* ins) {
+  MDefinition* lhs = ins->lhs();
+  MDefinition* rhs = ins->rhs();
+
+  MOZ_ASSERT(IsFloatingPointType(lhs->type()));
+  MOZ_ASSERT(lhs->type() == rhs->type());
+  MOZ_ASSERT(lhs->type() == ins->type());
+
+  LInstructionHelper<1, 2, 2>* lir;
+  if (lhs->type() == MIRType::Double) {
+    lir = new (alloc()) LCopySignD();
+  } else {
+    lir = new (alloc()) LCopySignF();
+  }
+
+  lir->setOperand(0, useRegisterAtStart(lhs));
+  lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
+                         ? useRegister(rhs)
+                         : useRegisterAtStart(rhs));
+  // The copySignDouble and copySignFloat32 are optimized for lhs == output.
+  // It also prevents rhs == output when lhs != output, avoids clobbering.
+  defineReuseInput(lir, ins, 0);
+}
+
+void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) {
+  defineInt64(
+      new (alloc()) LExtendInt32ToInt64(useRegisterAtStart(ins->input())), ins);
+}
+
+void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) {
+  defineInt64(new (alloc())
+                  LSignExtendInt64(useInt64RegisterAtStart(ins->input())),
+              ins);
+}
+
+void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MOZ_ASSERT(ins->v0()->type() == MIRType::Simd128);
+  MOZ_ASSERT(ins->v1()->type() == MIRType::Simd128);
+  MOZ_ASSERT(ins->v2()->type() == MIRType::Simd128);
+  MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::V128Bitselect: {
+      auto* lir = new (alloc()) LWasmTernarySimd128(
+          ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+          useRegisterAtStart(ins->v2()));
+      // On ARM64, control register is used as output at machine instruction.
+      defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+      break;
+    }
+    case wasm::SimdOp::F32x4RelaxedFma:
+    case wasm::SimdOp::F32x4RelaxedFnma:
+    case wasm::SimdOp::F64x2RelaxedFma:
+    case wasm::SimdOp::F64x2RelaxedFnma: {
+      auto* lir = new (alloc()) LWasmTernarySimd128(
+          ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+          useRegisterAtStart(ins->v2()));
+      defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+      break;
+    }
+    case wasm::SimdOp::I32x4DotI8x16I7x16AddS: {
+      auto* lir = new (alloc()) LWasmTernarySimd128(
+          ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+          useRegisterAtStart(ins->v2()), tempSimd128());
+      defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+      break;
+    }
+    case wasm::SimdOp::I8x16RelaxedLaneSelect:
+    case wasm::SimdOp::I16x8RelaxedLaneSelect:
+    case wasm::SimdOp::I32x4RelaxedLaneSelect:
+    case wasm::SimdOp::I64x2RelaxedLaneSelect: {
+      auto* lir = new (alloc()) LWasmTernarySimd128(
+          ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+          useRegisterAtStart(ins->v2()));
+      defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+      break;
+    }
+    default:
+      MOZ_CRASH("NYI");
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MDefinition* lhs = ins->lhs();
+  MDefinition* rhs = ins->rhs();
+  wasm::SimdOp op = ins->simdOp();
+
+  MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+  MOZ_ASSERT(rhs->type() == MIRType::Simd128);
+  MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+  LAllocation lhsAlloc = useRegisterAtStart(lhs);
+  LAllocation rhsAlloc = useRegisterAtStart(rhs);
+  LDefinition tempReg0 = LDefinition::BogusTemp();
+  LDefinition tempReg1 = LDefinition::BogusTemp();
+  if (op == wasm::SimdOp::I64x2Mul) {
+    tempReg0 = tempSimd128();
+    tempReg1 = tempSimd128();
+  }
+  auto* lir = new (alloc())
+      LWasmBinarySimd128(op, lhsAlloc, rhsAlloc, tempReg0, tempReg1);
+  define(lir, ins);
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+#ifdef ENABLE_WASM_SIMD
+bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle(
+    int8_t shuffle[16]) {
+  return false;
+}
+bool MWasmTernarySimd128::canRelaxBitselect() { return false; }
+
+bool MWasmBinarySimd128::canPmaddubsw() { return false; }
+#endif
+
+bool MWasmBinarySimd128::specializeForConstantRhs() {
+  // Probably many we want to do here
+  return false;
+}
+
+void LIRGenerator::visitWasmBinarySimd128WithConstant(
+    MWasmBinarySimd128WithConstant* ins) {
+  MOZ_CRASH("binary SIMD with constant NYI");
+}
+
+void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MDefinition* lhs = ins->lhs();
+  MDefinition* rhs = ins->rhs();
+
+  MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+  MOZ_ASSERT(rhs->type() == MIRType::Int32);
+  MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+  if (rhs->isConstant()) {
+    int32_t shiftCount = rhs->toConstant()->toInt32();
+    switch (ins->simdOp()) {
+      case wasm::SimdOp::I8x16Shl:
+      case wasm::SimdOp::I8x16ShrU:
+      case wasm::SimdOp::I8x16ShrS:
+        shiftCount &= 7;
+        break;
+      case wasm::SimdOp::I16x8Shl:
+      case wasm::SimdOp::I16x8ShrU:
+      case wasm::SimdOp::I16x8ShrS:
+        shiftCount &= 15;
+        break;
+      case wasm::SimdOp::I32x4Shl:
+      case wasm::SimdOp::I32x4ShrU:
+      case wasm::SimdOp::I32x4ShrS:
+        shiftCount &= 31;
+        break;
+      case wasm::SimdOp::I64x2Shl:
+      case wasm::SimdOp::I64x2ShrU:
+      case wasm::SimdOp::I64x2ShrS:
+        shiftCount &= 63;
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift operation");
+    }
+#  ifdef DEBUG
+    js::wasm::ReportSimdAnalysis("shift -> constant shift");
+#  endif
+    auto* lir = new (alloc())
+        LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount);
+    define(lir, ins);
+    return;
+  }
+
+#  ifdef DEBUG
+  js::wasm::ReportSimdAnalysis("shift -> variable shift");
+#  endif
+
+  LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
+  LAllocation rhsAlloc = useRegisterAtStart(rhs);
+  auto* lir = new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc,
+                                                      LDefinition::BogusTemp());
+  define(lir, ins);
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+  MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
+  MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+  SimdShuffle s = ins->shuffle();
+  switch (s.opd) {
+    case SimdShuffle::Operand::LEFT:
+    case SimdShuffle::Operand::RIGHT: {
+      LAllocation src;
+      switch (*s.permuteOp) {
+        case SimdPermuteOp::MOVE:
+        case SimdPermuteOp::BROADCAST_8x16:
+        case SimdPermuteOp::BROADCAST_16x8:
+        case SimdPermuteOp::PERMUTE_8x16:
+        case SimdPermuteOp::PERMUTE_16x8:
+        case SimdPermuteOp::PERMUTE_32x4:
+        case SimdPermuteOp::ROTATE_RIGHT_8x16:
+        case SimdPermuteOp::SHIFT_LEFT_8x16:
+        case SimdPermuteOp::SHIFT_RIGHT_8x16:
+        case SimdPermuteOp::REVERSE_16x8:
+        case SimdPermuteOp::REVERSE_32x4:
+        case SimdPermuteOp::REVERSE_64x2:
+          break;
+        default:
+          MOZ_CRASH("Unexpected operator");
+      }
+      if (s.opd == SimdShuffle::Operand::LEFT) {
+        src = useRegisterAtStart(ins->lhs());
+      } else {
+        src = useRegisterAtStart(ins->rhs());
+      }
+      auto* lir =
+          new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
+      define(lir, ins);
+      break;
+    }
+    case SimdShuffle::Operand::BOTH:
+    case SimdShuffle::Operand::BOTH_SWAPPED: {
+      LDefinition temp = LDefinition::BogusTemp();
+      LAllocation lhs;
+      LAllocation rhs;
+      if (s.opd == SimdShuffle::Operand::BOTH) {
+        lhs = useRegisterAtStart(ins->lhs());
+        rhs = useRegisterAtStart(ins->rhs());
+      } else {
+        lhs = useRegisterAtStart(ins->rhs());
+        rhs = useRegisterAtStart(ins->lhs());
+      }
+      auto* lir = new (alloc())
+          LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
+      define(lir, ins);
+      break;
+    }
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+  MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+  // Optimal code generation reuses the lhs register because the rhs scalar is
+  // merged into a vector lhs.
+  LAllocation lhs = useRegisterAtStart(ins->lhs());
+  if (ins->rhs()->type() == MIRType::Int64) {
+    auto* lir = new (alloc())
+        LWasmReplaceInt64LaneSimd128(lhs, useInt64Register(ins->rhs()));
+    defineReuseInput(lir, ins, 0);
+  } else {
+    auto* lir =
+        new (alloc()) LWasmReplaceLaneSimd128(lhs, useRegister(ins->rhs()));
+    defineReuseInput(lir, ins, 0);
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+  switch (ins->input()->type()) {
+    case MIRType::Int64: {
+      // 64-bit integer splats.
+      // Load-and-(sign|zero)extend.
+      auto* lir = new (alloc())
+          LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
+      define(lir, ins);
+      break;
+    }
+    case MIRType::Float32:
+    case MIRType::Double: {
+      // Floating-point splats.
+      auto* lir =
+          new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
+      define(lir, ins);
+      break;
+    }
+    default: {
+      // 32-bit integer splats.
+      auto* lir =
+          new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
+      define(lir, ins);
+      break;
+    }
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
+  MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+  LDefinition tempReg = LDefinition::BogusTemp();
+  switch (ins->simdOp()) {
+    case wasm::SimdOp::I8x16Neg:
+    case wasm::SimdOp::I16x8Neg:
+    case wasm::SimdOp::I32x4Neg:
+    case wasm::SimdOp::I64x2Neg:
+    case wasm::SimdOp::F32x4Neg:
+    case wasm::SimdOp::F64x2Neg:
+    case wasm::SimdOp::F32x4Abs:
+    case wasm::SimdOp::F64x2Abs:
+    case wasm::SimdOp::V128Not:
+    case wasm::SimdOp::F32x4Sqrt:
+    case wasm::SimdOp::F64x2Sqrt:
+    case wasm::SimdOp::I8x16Abs:
+    case wasm::SimdOp::I16x8Abs:
+    case wasm::SimdOp::I32x4Abs:
+    case wasm::SimdOp::I64x2Abs:
+    case wasm::SimdOp::I32x4TruncSatF32x4S:
+    case wasm::SimdOp::F32x4ConvertI32x4U:
+    case wasm::SimdOp::I32x4TruncSatF32x4U:
+    case wasm::SimdOp::I16x8ExtendLowI8x16S:
+    case wasm::SimdOp::I16x8ExtendHighI8x16S:
+    case wasm::SimdOp::I16x8ExtendLowI8x16U:
+    case wasm::SimdOp::I16x8ExtendHighI8x16U:
+    case wasm::SimdOp::I32x4ExtendLowI16x8S:
+    case wasm::SimdOp::I32x4ExtendHighI16x8S:
+    case wasm::SimdOp::I32x4ExtendLowI16x8U:
+    case wasm::SimdOp::I32x4ExtendHighI16x8U:
+    case wasm::SimdOp::I64x2ExtendLowI32x4S:
+    case wasm::SimdOp::I64x2ExtendHighI32x4S:
+    case wasm::SimdOp::I64x2ExtendLowI32x4U:
+    case wasm::SimdOp::I64x2ExtendHighI32x4U:
+    case wasm::SimdOp::F32x4ConvertI32x4S:
+    case wasm::SimdOp::F32x4Ceil:
+    case wasm::SimdOp::F32x4Floor:
+    case wasm::SimdOp::F32x4Trunc:
+    case wasm::SimdOp::F32x4Nearest:
+    case wasm::SimdOp::F64x2Ceil:
+    case wasm::SimdOp::F64x2Floor:
+    case wasm::SimdOp::F64x2Trunc:
+    case wasm::SimdOp::F64x2Nearest:
+    case wasm::SimdOp::F32x4DemoteF64x2Zero:
+    case wasm::SimdOp::F64x2PromoteLowF32x4:
+    case wasm::SimdOp::F64x2ConvertLowI32x4S:
+    case wasm::SimdOp::F64x2ConvertLowI32x4U:
+    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
+    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
+    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
+    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
+    case wasm::SimdOp::I8x16Popcnt:
+    case wasm::SimdOp::I32x4RelaxedTruncF32x4S:
+    case wasm::SimdOp::I32x4RelaxedTruncF32x4U:
+    case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero:
+    case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero:
+      break;
+    case wasm::SimdOp::I32x4TruncSatF64x2SZero:
+    case wasm::SimdOp::I32x4TruncSatF64x2UZero:
+      tempReg = tempSimd128();
+      break;
+    default:
+      MOZ_CRASH("Unary SimdOp not implemented");
+  }
+
+  LUse input = useRegisterAtStart(ins->input());
+  LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(input, tempReg);
+  define(lir, ins);
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  if (canEmitWasmReduceSimd128AtUses(ins)) {
+    emitAtUses(ins);
+    return;
+  }
+
+  // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer
+  // useRegisterAtStart:
+  //
+  // - In most cases, the input type differs from the output type, so there's no
+  //   conflict and it doesn't really matter.
+  //
+  // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero
+  //   code being generated.
+  //
+  // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register
+  //   to be targeted lowers register pressure if it's the last use of the
+  //   input.
+
+  if (ins->type() == MIRType::Int64) {
+    auto* lir = new (alloc())
+        LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
+    defineInt64(lir, ins);
+  } else {
+    LDefinition tempReg = LDefinition::BogusTemp();
+    switch (ins->simdOp()) {
+      case wasm::SimdOp::I8x16Bitmask:
+      case wasm::SimdOp::I16x8Bitmask:
+      case wasm::SimdOp::I32x4Bitmask:
+      case wasm::SimdOp::I64x2Bitmask:
+        tempReg = tempSimd128();
+        break;
+      default:
+        break;
+    }
+
+    // Ideally we would reuse the input register for floating extract_lane if
+    // the lane is zero, but constraints in the register allocator require the
+    // input and output register types to be the same.
+    auto* lir = new (alloc())
+        LWasmReduceSimd128(useRegisterAtStart(ins->input()), tempReg);
+    define(lir, ins);
+  }
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  // On 64-bit systems, the base pointer can be 32 bits or 64 bits.  Either way,
+  // it fits in a GPR so we can ignore the Register/Register64 distinction here.
+
+  // Optimal allocation here reuses the value input for the output register
+  // because codegen otherwise has to copy the input to the output; this is
+  // because load-lane is implemented as load + replace-lane.  Bug 1706106 may
+  // change all of that, so leave it alone for now.
+  LUse base = useRegisterAtStart(ins->base());
+  LUse inputUse = useRegisterAtStart(ins->value());
+  MOZ_ASSERT(!ins->hasMemoryBase());
+  LWasmLoadLaneSimd128* lir =
+      new (alloc()) LWasmLoadLaneSimd128(base, inputUse, temp(), LAllocation());
+  define(lir, ins);
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+  // See comment above about the base pointer.
+
+  LUse base = useRegisterAtStart(ins->base());
+  LUse input = useRegisterAtStart(ins->value());
+  MOZ_ASSERT(!ins->hasMemoryBase());
+  LWasmStoreLaneSimd128* lir =
+      new (alloc()) LWasmStoreLaneSimd128(base, input, temp(), LAllocation());
+  add(lir, ins);
+#else
+  MOZ_CRASH("No SIMD");
+#endif
+}
diff --git a/js/src/jit/arm64/Lowering-arm64.h b/js/src/jit/arm64/Lowering-arm64.h
new file mode 100644
index 0000000000..4ab52dd464
--- /dev/null
+++ b/js/src/jit/arm64/Lowering-arm64.h
@@ -0,0 +1,135 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_Lowering_arm64_h
+#define jit_arm64_Lowering_arm64_h
+
+#include "jit/shared/Lowering-shared.h"
+
+namespace js {
+namespace jit {
+
+class LIRGeneratorARM64 : public LIRGeneratorShared {
+ protected:
+  LIRGeneratorARM64(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph)
+      : LIRGeneratorShared(gen, graph, lirGraph) {}
+
+  // Returns a box allocation. reg2 is ignored on 64-bit platforms.
+  LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register reg2,
+                             bool useAtStart = false);
+
+  LAllocation useByteOpRegister(MDefinition* mir);
+  LAllocation useByteOpRegisterAtStart(MDefinition* mir);
+  LAllocation useByteOpRegisterOrNonDoubleConstant(MDefinition* mir);
+  LDefinition tempByteOpRegister();
+
+  LDefinition tempToUnbox();
+
+  bool needTempForPostBarrier() { return true; }
+
+  // ARM64 has a scratch register, so no need for another temp for dispatch ICs.
+  LDefinition tempForDispatchCache(MIRType outputType = MIRType::None) {
+    return LDefinition::BogusTemp();
+  }
+
+  void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
+                            size_t lirIndex);
+  void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
+                          size_t lirIndex) {
+    lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
+  }
+  void defineInt64Phi(MPhi* phi, size_t lirIndex) {
+    defineTypedPhi(phi, lirIndex);
+  }
+  void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                     MDefinition* lhs, MDefinition* rhs);
+  void lowerUrshD(MUrsh* mir);
+
+  void lowerPowOfTwoI(MPow* mir);
+
+  void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+                   MDefinition* input);
+  void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                   MDefinition* lhs, MDefinition* rhs);
+
+  void lowerForALUInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins,
+                        MDefinition* mir, MDefinition* input);
+  void lowerForALUInt64(
+      LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+      MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+  void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs,
+                        MDefinition* rhs);
+  template <size_t Temps>
+  void lowerForShiftInt64(
+      LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+      MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+  void lowerForCompareI64AndBranch(MTest* mir, MCompare* comp, JSOp op,
+                                   MDefinition* left, MDefinition* right,
+                                   MBasicBlock* ifTrue, MBasicBlock* ifFalse);
+
+  void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+                   MDefinition* input);
+
+  template <size_t Temps>
+  void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
+                   MDefinition* lhs, MDefinition* rhs);
+
+  void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins);
+  void lowerWasmBuiltinTruncateToInt64(MWasmBuiltinTruncateToInt64* ins);
+  void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
+                               MDefinition* lhs, MDefinition* rhs);
+  void lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32* ins);
+  void lowerTruncateDToInt32(MTruncateToInt32* ins);
+  void lowerTruncateFToInt32(MTruncateToInt32* ins);
+  void lowerDivI(MDiv* div);
+  void lowerModI(MMod* mod);
+  void lowerDivI64(MDiv* div);
+  void lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div);
+  void lowerModI64(MMod* mod);
+  void lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod);
+  void lowerUDivI64(MDiv* div);
+  void lowerUModI64(MMod* mod);
+  void lowerNegI(MInstruction* ins, MDefinition* input);
+  void lowerNegI64(MInstruction* ins, MDefinition* input);
+  void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs);
+  void lowerUDiv(MDiv* div);
+  void lowerUMod(MMod* mod);
+  void lowerWasmSelectI(MWasmSelect* select);
+  void lowerWasmSelectI64(MWasmSelect* select);
+  bool canSpecializeWasmCompareAndSelect(MCompare::CompareType compTy,
+                                         MIRType insTy);
+  void lowerWasmCompareAndSelect(MWasmSelect* ins, MDefinition* lhs,
+                                 MDefinition* rhs, MCompare::CompareType compTy,
+                                 JSOp jsop);
+
+  void lowerBigIntLsh(MBigIntLsh* ins);
+  void lowerBigIntRsh(MBigIntRsh* ins);
+  void lowerBigIntDiv(MBigIntDiv* ins);
+  void lowerBigIntMod(MBigIntMod* ins);
+
+  void lowerAtomicLoad64(MLoadUnboxedScalar* ins);
+  void lowerAtomicStore64(MStoreUnboxedScalar* ins);
+
+#ifdef ENABLE_WASM_SIMD
+  bool canFoldReduceSimd128AndBranch(wasm::SimdOp op);
+  bool canEmitWasmReduceSimd128AtUses(MWasmReduceSimd128* ins);
+#endif
+
+  LTableSwitchV* newLTableSwitchV(MTableSwitch* ins);
+  LTableSwitch* newLTableSwitch(const LAllocation& in,
+                                const LDefinition& inputCopy,
+                                MTableSwitch* ins);
+
+  void lowerPhi(MPhi* phi);
+};
+
+typedef LIRGeneratorARM64 LIRGeneratorSpecific;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm64_Lowering_arm64_h */
diff --git a/js/src/jit/arm64/MacroAssembler-arm64-inl.h b/js/src/jit/arm64/MacroAssembler-arm64-inl.h
new file mode 100644
index 0000000000..283867a29a
--- /dev/null
+++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h
@@ -0,0 +1,4079 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_MacroAssembler_arm64_inl_h
+#define jit_arm64_MacroAssembler_arm64_inl_h
+
+#include "jit/arm64/MacroAssembler-arm64.h"
+
+namespace js {
+namespace jit {
+
+//{{{ check_macroassembler_style
+
+void MacroAssembler::move64(Register64 src, Register64 dest) {
+  Mov(ARMRegister(dest.reg, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::move64(Imm64 imm, Register64 dest) {
+  Mov(ARMRegister(dest.reg, 64), imm.value);
+}
+
+void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
+  Fmov(ARMRegister(dest, 32), ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
+  Fmov(ARMFPRegister(dest, 32), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move8SignExtend(Register src, Register dest) {
+  Sxtb(ARMRegister(dest, 32), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move16SignExtend(Register src, Register dest) {
+  Sxth(ARMRegister(dest, 32), ARMRegister(src, 32));
+}
+
+void MacroAssembler::moveDoubleToGPR64(FloatRegister src, Register64 dest) {
+  Fmov(ARMRegister(dest.reg, 64), ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::moveGPR64ToDouble(Register64 src, FloatRegister dest) {
+  Fmov(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::move64To32(Register64 src, Register dest) {
+  Mov(ARMRegister(dest, 32), ARMRegister(src.reg, 32));
+}
+
+void MacroAssembler::move32To64ZeroExtend(Register src, Register64 dest) {
+  Uxtw(ARMRegister(dest.reg, 64), ARMRegister(src, 64));
+}
+
+void MacroAssembler::move8To64SignExtend(Register src, Register64 dest) {
+  Sxtb(ARMRegister(dest.reg, 64), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move16To64SignExtend(Register src, Register64 dest) {
+  Sxth(ARMRegister(dest.reg, 64), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move32To64SignExtend(Register src, Register64 dest) {
+  Sxtw(ARMRegister(dest.reg, 64), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move32SignExtendToPtr(Register src, Register dest) {
+  Sxtw(ARMRegister(dest, 64), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move32ZeroExtendToPtr(Register src, Register dest) {
+  Uxtw(ARMRegister(dest, 64), ARMRegister(src, 64));
+}
+
+// ===============================================================
+// Load instructions
+
+void MacroAssembler::load32SignExtendToPtr(const Address& src, Register dest) {
+  load32(src, dest);
+  move32To64SignExtend(dest, Register64(dest));
+}
+
+void MacroAssembler::loadAbiReturnAddress(Register dest) { movePtr(lr, dest); }
+
+// ===============================================================
+// Logical instructions
+
+void MacroAssembler::not32(Register reg) {
+  Orn(ARMRegister(reg, 32), vixl::wzr, ARMRegister(reg, 32));
+}
+
+void MacroAssembler::notPtr(Register reg) {
+  Orn(ARMRegister(reg, 64), vixl::xzr, ARMRegister(reg, 64));
+}
+
+void MacroAssembler::and32(Register src, Register dest) {
+  And(ARMRegister(dest, 32), ARMRegister(dest, 32),
+      Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::and32(Imm32 imm, Register dest) {
+  And(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::and32(Imm32 imm, Register src, Register dest) {
+  And(ARMRegister(dest, 32), ARMRegister(src, 32), Operand(imm.value));
+}
+
+void MacroAssembler::and32(Imm32 imm, const Address& dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+  load32(dest, scratch32.asUnsized());
+  And(scratch32, scratch32, Operand(imm.value));
+  store32(scratch32.asUnsized(), dest);
+}
+
+void MacroAssembler::and32(const Address& src, Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  MOZ_ASSERT(scratch32.asUnsized() != src.base);
+  load32(src, scratch32.asUnsized());
+  And(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32));
+}
+
+void MacroAssembler::andPtr(Register src, Register dest) {
+  And(ARMRegister(dest, 64), ARMRegister(dest, 64),
+      Operand(ARMRegister(src, 64)));
+}
+
+void MacroAssembler::andPtr(Imm32 imm, Register dest) {
+  And(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::and64(Imm64 imm, Register64 dest) {
+  And(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+void MacroAssembler::and64(Register64 src, Register64 dest) {
+  And(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64),
+      ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::or64(Imm64 imm, Register64 dest) {
+  Orr(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+void MacroAssembler::or32(Imm32 imm, Register dest) {
+  Orr(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::or32(Register src, Register dest) {
+  Orr(ARMRegister(dest, 32), ARMRegister(dest, 32),
+      Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::or32(Imm32 imm, const Address& dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+  load32(dest, scratch32.asUnsized());
+  Orr(scratch32, scratch32, Operand(imm.value));
+  store32(scratch32.asUnsized(), dest);
+}
+
+void MacroAssembler::orPtr(Register src, Register dest) {
+  Orr(ARMRegister(dest, 64), ARMRegister(dest, 64),
+      Operand(ARMRegister(src, 64)));
+}
+
+void MacroAssembler::orPtr(Imm32 imm, Register dest) {
+  Orr(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::or64(Register64 src, Register64 dest) {
+  orPtr(src.reg, dest.reg);
+}
+
+void MacroAssembler::xor64(Register64 src, Register64 dest) {
+  xorPtr(src.reg, dest.reg);
+}
+
+void MacroAssembler::xor32(Register src, Register dest) {
+  Eor(ARMRegister(dest, 32), ARMRegister(dest, 32),
+      Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::xor32(Imm32 imm, Register dest) {
+  Eor(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::xor32(Imm32 imm, const Address& dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+  load32(dest, scratch32.asUnsized());
+  Eor(scratch32, scratch32, Operand(imm.value));
+  store32(scratch32.asUnsized(), dest);
+}
+
+void MacroAssembler::xor32(const Address& src, Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  MOZ_ASSERT(scratch32.asUnsized() != src.base);
+  load32(src, scratch32.asUnsized());
+  Eor(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32));
+}
+
+void MacroAssembler::xorPtr(Register src, Register dest) {
+  Eor(ARMRegister(dest, 64), ARMRegister(dest, 64),
+      Operand(ARMRegister(src, 64)));
+}
+
+void MacroAssembler::xorPtr(Imm32 imm, Register dest) {
+  Eor(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::xor64(Imm64 imm, Register64 dest) {
+  Eor(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+// ===============================================================
+// Swap instructions
+
+void MacroAssembler::byteSwap16SignExtend(Register reg) {
+  rev16(ARMRegister(reg, 32), ARMRegister(reg, 32));
+  sxth(ARMRegister(reg, 32), ARMRegister(reg, 32));
+}
+
+void MacroAssembler::byteSwap16ZeroExtend(Register reg) {
+  rev16(ARMRegister(reg, 32), ARMRegister(reg, 32));
+  uxth(ARMRegister(reg, 32), ARMRegister(reg, 32));
+}
+
+void MacroAssembler::byteSwap32(Register reg) {
+  rev(ARMRegister(reg, 32), ARMRegister(reg, 32));
+}
+
+void MacroAssembler::byteSwap64(Register64 reg) {
+  rev(ARMRegister(reg.reg, 64), ARMRegister(reg.reg, 64));
+}
+
+// ===============================================================
+// Arithmetic functions
+
+void MacroAssembler::add32(Register src, Register dest) {
+  Add(ARMRegister(dest, 32), ARMRegister(dest, 32),
+      Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::add32(Imm32 imm, Register dest) {
+  Add(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::add32(Imm32 imm, const Address& dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+
+  Ldr(scratch32, toMemOperand(dest));
+  Add(scratch32, scratch32, Operand(imm.value));
+  Str(scratch32, toMemOperand(dest));
+}
+
+void MacroAssembler::addPtr(Register src, Register dest) {
+  addPtr(src, dest, dest);
+}
+
+void MacroAssembler::addPtr(Register src1, Register src2, Register dest) {
+  Add(ARMRegister(dest, 64), ARMRegister(src1, 64),
+      Operand(ARMRegister(src2, 64)));
+}
+
+void MacroAssembler::addPtr(Imm32 imm, Register dest) {
+  addPtr(imm, dest, dest);
+}
+
+void MacroAssembler::addPtr(Imm32 imm, Register src, Register dest) {
+  Add(ARMRegister(dest, 64), ARMRegister(src, 64), Operand(imm.value));
+}
+
+void MacroAssembler::addPtr(ImmWord imm, Register dest) {
+  Add(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::addPtr(Imm32 imm, const Address& dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  MOZ_ASSERT(scratch64.asUnsized() != dest.base);
+
+  Ldr(scratch64, toMemOperand(dest));
+  Add(scratch64, scratch64, Operand(imm.value));
+  Str(scratch64, toMemOperand(dest));
+}
+
+void MacroAssembler::addPtr(const Address& src, Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  MOZ_ASSERT(scratch64.asUnsized() != src.base);
+
+  Ldr(scratch64, toMemOperand(src));
+  Add(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(scratch64));
+}
+
+void MacroAssembler::add64(Register64 src, Register64 dest) {
+  addPtr(src.reg, dest.reg);
+}
+
+void MacroAssembler::add64(Imm32 imm, Register64 dest) {
+  Add(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+void MacroAssembler::add64(Imm64 imm, Register64 dest) {
+  Add(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+CodeOffset MacroAssembler::sub32FromStackPtrWithPatch(Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch = temps.AcquireX();
+  AutoForbidPoolsAndNops afp(this,
+                             /* max number of instructions in scope = */ 3);
+  CodeOffset offs = CodeOffset(currentOffset());
+  movz(scratch, 0, 0);
+  movk(scratch, 0, 16);
+  Sub(ARMRegister(dest, 64), sp, scratch);
+  return offs;
+}
+
+void MacroAssembler::patchSub32FromStackPtr(CodeOffset offset, Imm32 imm) {
+  Instruction* i1 = getInstructionAt(BufferOffset(offset.offset()));
+  MOZ_ASSERT(i1->IsMovz());
+  i1->SetInstructionBits(i1->InstructionBits() |
+                         ImmMoveWide(uint16_t(imm.value)));
+
+  Instruction* i2 = getInstructionAt(BufferOffset(offset.offset() + 4));
+  MOZ_ASSERT(i2->IsMovk());
+  i2->SetInstructionBits(i2->InstructionBits() |
+                         ImmMoveWide(uint16_t(imm.value >> 16)));
+}
+
+void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) {
+  fadd(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64),
+       ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) {
+  fadd(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32),
+       ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::sub32(Imm32 imm, Register dest) {
+  Sub(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::sub32(Register src, Register dest) {
+  Sub(ARMRegister(dest, 32), ARMRegister(dest, 32),
+      Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::sub32(const Address& src, Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  MOZ_ASSERT(scratch32.asUnsized() != src.base);
+  load32(src, scratch32.asUnsized());
+  Sub(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32));
+}
+
+void MacroAssembler::subPtr(Register src, Register dest) {
+  Sub(ARMRegister(dest, 64), ARMRegister(dest, 64),
+      Operand(ARMRegister(src, 64)));
+}
+
+void MacroAssembler::subPtr(Register src, const Address& dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  MOZ_ASSERT(scratch64.asUnsized() != dest.base);
+
+  Ldr(scratch64, toMemOperand(dest));
+  Sub(scratch64, scratch64, Operand(ARMRegister(src, 64)));
+  Str(scratch64, toMemOperand(dest));
+}
+
+void MacroAssembler::subPtr(Imm32 imm, Register dest) {
+  Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::subPtr(const Address& addr, Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  MOZ_ASSERT(scratch64.asUnsized() != addr.base);
+
+  Ldr(scratch64, toMemOperand(addr));
+  Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(scratch64));
+}
+
+void MacroAssembler::sub64(Register64 src, Register64 dest) {
+  Sub(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64),
+      ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::sub64(Imm64 imm, Register64 dest) {
+  Sub(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) {
+  fsub(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64),
+       ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) {
+  fsub(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32),
+       ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::mul32(Register rhs, Register srcDest) {
+  mul32(srcDest, rhs, srcDest, nullptr);
+}
+
+void MacroAssembler::mul32(Imm32 imm, Register srcDest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+
+  move32(imm, scratch32.asUnsized());
+  mul32(scratch32.asUnsized(), srcDest);
+}
+
+void MacroAssembler::mul32(Register src1, Register src2, Register dest,
+                           Label* onOver) {
+  if (onOver) {
+    Smull(ARMRegister(dest, 64), ARMRegister(src1, 32), ARMRegister(src2, 32));
+    Cmp(ARMRegister(dest, 64), Operand(ARMRegister(dest, 32), vixl::SXTW));
+    B(onOver, NotEqual);
+
+    // Clear upper 32 bits.
+    Uxtw(ARMRegister(dest, 64), ARMRegister(dest, 64));
+  } else {
+    Mul(ARMRegister(dest, 32), ARMRegister(src1, 32), ARMRegister(src2, 32));
+  }
+}
+
+void MacroAssembler::mulHighUnsigned32(Imm32 imm, Register src, Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+
+  Mov(scratch32, int32_t(imm.value));
+  Umull(ARMRegister(dest, 64), scratch32, ARMRegister(src, 32));
+
+  Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), 32);
+}
+
+void MacroAssembler::mulPtr(Register rhs, Register srcDest) {
+  Mul(ARMRegister(srcDest, 64), ARMRegister(srcDest, 64), ARMRegister(rhs, 64));
+}
+
+void MacroAssembler::mul64(Imm64 imm, const Register64& dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  MOZ_ASSERT(dest.reg != scratch64.asUnsized());
+  mov(ImmWord(imm.value), scratch64.asUnsized());
+  Mul(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), scratch64);
+}
+
+void MacroAssembler::mul64(const Register64& src, const Register64& dest,
+                           const Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+  Mul(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64),
+      ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::mul64(const Register64& src1, const Register64& src2,
+                           const Register64& dest) {
+  Mul(ARMRegister(dest.reg, 64), ARMRegister(src1.reg, 64),
+      ARMRegister(src2.reg, 64));
+}
+
+void MacroAssembler::mul64(Imm64 src1, const Register64& src2,
+                           const Register64& dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  MOZ_ASSERT(dest.reg != scratch64.asUnsized());
+  mov(ImmWord(src1.value), scratch64.asUnsized());
+  Mul(ARMRegister(dest.reg, 64), ARMRegister(src2.reg, 64), scratch64);
+}
+
+void MacroAssembler::mulBy3(Register src, Register dest) {
+  ARMRegister xdest(dest, 64);
+  ARMRegister xsrc(src, 64);
+  Add(xdest, xsrc, Operand(xsrc, vixl::LSL, 1));
+}
+
+void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) {
+  fmul(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32),
+       ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) {
+  fmul(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64),
+       ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp,
+                                  FloatRegister dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(temp != scratch);
+  movePtr(imm, scratch);
+  const ARMFPRegister scratchDouble = temps.AcquireD();
+  Ldr(scratchDouble, MemOperand(Address(scratch, 0)));
+  fmul(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64), scratchDouble);
+}
+
+void MacroAssembler::quotient32(Register rhs, Register srcDest,
+                                bool isUnsigned) {
+  if (isUnsigned) {
+    Udiv(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32),
+         ARMRegister(rhs, 32));
+  } else {
+    Sdiv(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32),
+         ARMRegister(rhs, 32));
+  }
+}
+
+// This does not deal with x % 0 or INT_MIN % -1, the caller needs to filter
+// those cases when they may occur.
+
+void MacroAssembler::remainder32(Register rhs, Register srcDest,
+                                 bool isUnsigned) {
+  vixl::UseScratchRegisterScope temps(this);
+  ARMRegister scratch = temps.AcquireW();
+  if (isUnsigned) {
+    Udiv(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32));
+  } else {
+    Sdiv(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32));
+  }
+  Mul(scratch, scratch, ARMRegister(rhs, 32));
+  Sub(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32), scratch);
+}
+
+void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) {
+  fdiv(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32),
+       ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) {
+  fdiv(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64),
+       ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::inc64(AbsoluteAddress dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratchAddr64 = temps.AcquireX();
+  const ARMRegister scratch64 = temps.AcquireX();
+
+  Mov(scratchAddr64, uint64_t(dest.addr));
+  Ldr(scratch64, MemOperand(scratchAddr64, 0));
+  Add(scratch64, scratch64, Operand(1));
+  Str(scratch64, MemOperand(scratchAddr64, 0));
+}
+
+void MacroAssembler::neg32(Register reg) {
+  Neg(ARMRegister(reg, 32), Operand(ARMRegister(reg, 32)));
+}
+
+void MacroAssembler::neg64(Register64 reg) { negPtr(reg.reg); }
+
+void MacroAssembler::negPtr(Register reg) {
+  Neg(ARMRegister(reg, 64), Operand(ARMRegister(reg, 64)));
+}
+
+void MacroAssembler::negateFloat(FloatRegister reg) {
+  fneg(ARMFPRegister(reg, 32), ARMFPRegister(reg, 32));
+}
+
+void MacroAssembler::negateDouble(FloatRegister reg) {
+  fneg(ARMFPRegister(reg, 64), ARMFPRegister(reg, 64));
+}
+
+void MacroAssembler::abs32(Register src, Register dest) {
+  Cmp(ARMRegister(src, 32), wzr);
+  Cneg(ARMRegister(dest, 32), ARMRegister(src, 32), Assembler::LessThan);
+}
+
+void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) {
+  fabs(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) {
+  fabs(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) {
+  fsqrt(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) {
+  fsqrt(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest,
+                                bool handleNaN) {
+  MOZ_ASSERT(handleNaN);  // Always true for wasm
+  fmin(ARMFPRegister(srcDest, 32), ARMFPRegister(srcDest, 32),
+       ARMFPRegister(other, 32));
+}
+
+void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest,
+                               bool handleNaN) {
+  MOZ_ASSERT(handleNaN);  // Always true for wasm
+  fmin(ARMFPRegister(srcDest, 64), ARMFPRegister(srcDest, 64),
+       ARMFPRegister(other, 64));
+}
+
+void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest,
+                                bool handleNaN) {
+  MOZ_ASSERT(handleNaN);  // Always true for wasm
+  fmax(ARMFPRegister(srcDest, 32), ARMFPRegister(srcDest, 32),
+       ARMFPRegister(other, 32));
+}
+
+void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest,
+                               bool handleNaN) {
+  MOZ_ASSERT(handleNaN);  // Always true for wasm
+  fmax(ARMFPRegister(srcDest, 64), ARMFPRegister(srcDest, 64),
+       ARMFPRegister(other, 64));
+}
+
+// ===============================================================
+// Shift functions
+
+void MacroAssembler::lshiftPtr(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  Lsl(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value);
+}
+
+void MacroAssembler::lshiftPtr(Register shift, Register dest) {
+  Lsl(ARMRegister(dest, 64), ARMRegister(dest, 64), ARMRegister(shift, 64));
+}
+
+void MacroAssembler::lshift64(Imm32 imm, Register64 dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  lshiftPtr(imm, dest.reg);
+}
+
+void MacroAssembler::lshift64(Register shift, Register64 srcDest) {
+  Lsl(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64),
+      ARMRegister(shift, 64));
+}
+
+void MacroAssembler::lshift32(Register shift, Register dest) {
+  Lsl(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32));
+}
+
+void MacroAssembler::flexibleLshift32(Register src, Register dest) {
+  lshift32(src, dest);
+}
+
+void MacroAssembler::lshift32(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  Lsl(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value);
+}
+
+void MacroAssembler::rshiftPtr(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value);
+}
+
+void MacroAssembler::rshiftPtr(Imm32 imm, Register src, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  Lsr(ARMRegister(dest, 64), ARMRegister(src, 64), imm.value);
+}
+
+void MacroAssembler::rshiftPtr(Register shift, Register dest) {
+  Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), ARMRegister(shift, 64));
+}
+
+void MacroAssembler::rshift32(Register shift, Register dest) {
+  Lsr(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32));
+}
+
+void MacroAssembler::flexibleRshift32(Register src, Register dest) {
+  rshift32(src, dest);
+}
+
+void MacroAssembler::rshift32(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  Lsr(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value);
+}
+
+void MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  Asr(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value);
+}
+
+void MacroAssembler::rshift32Arithmetic(Register shift, Register dest) {
+  Asr(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32));
+}
+
+void MacroAssembler::rshift32Arithmetic(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  Asr(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value);
+}
+
+void MacroAssembler::flexibleRshift32Arithmetic(Register src, Register dest) {
+  rshift32Arithmetic(src, dest);
+}
+
+void MacroAssembler::rshift64(Imm32 imm, Register64 dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  rshiftPtr(imm, dest.reg);
+}
+
+void MacroAssembler::rshift64(Register shift, Register64 srcDest) {
+  Lsr(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64),
+      ARMRegister(shift, 64));
+}
+
+void MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest) {
+  Asr(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), imm.value);
+}
+
+void MacroAssembler::rshift64Arithmetic(Register shift, Register64 srcDest) {
+  Asr(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64),
+      ARMRegister(shift, 64));
+}
+
+// ===============================================================
+// Condition functions
+
+void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs,
+                             Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      load8ZeroExtend(lhs, scratch);
+      cmp32Set(cond, scratch, Imm32(uint8_t(rhs.value)), dest);
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      load8SignExtend(lhs, scratch);
+      cmp32Set(cond, scratch, Imm32(int8_t(rhs.value)), dest);
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs,
+                              Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      load16ZeroExtend(lhs, scratch);
+      cmp32Set(cond, scratch, Imm32(uint16_t(rhs.value)), dest);
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      load16SignExtend(lhs, scratch);
+      cmp32Set(cond, scratch, Imm32(int16_t(rhs.value)), dest);
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+template <typename T1, typename T2>
+void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) {
+  cmp32(lhs, rhs);
+  emitSet(cond, dest);
+}
+
+void MacroAssembler::cmp64Set(Condition cond, Address lhs, Imm64 rhs,
+                              Register dest) {
+  cmpPtrSet(cond, lhs, ImmWord(static_cast<uintptr_t>(rhs.value)), dest);
+}
+
+template <typename T1, typename T2>
+void MacroAssembler::cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) {
+  cmpPtr(lhs, rhs);
+  emitSet(cond, dest);
+}
+
+// ===============================================================
+// Rotation functions
+
+void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) {
+  Ror(ARMRegister(dest, 32), ARMRegister(input, 32), (32 - count.value) & 31);
+}
+
+void MacroAssembler::rotateLeft(Register count, Register input, Register dest) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch = temps.AcquireW();
+  // Really 32 - count, but the upper bits of the result are ignored.
+  Neg(scratch, ARMRegister(count, 32));
+  Ror(ARMRegister(dest, 32), ARMRegister(input, 32), scratch);
+}
+
+void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) {
+  Ror(ARMRegister(dest, 32), ARMRegister(input, 32), count.value & 31);
+}
+
+void MacroAssembler::rotateRight(Register count, Register input,
+                                 Register dest) {
+  Ror(ARMRegister(dest, 32), ARMRegister(input, 32), ARMRegister(count, 32));
+}
+
+void MacroAssembler::rotateLeft64(Register count, Register64 input,
+                                  Register64 dest, Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch = temps.AcquireX();
+  // Really 64 - count, but the upper bits of the result are ignored.
+  Neg(scratch, ARMRegister(count, 64));
+  Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64), scratch);
+}
+
+void MacroAssembler::rotateLeft64(Imm32 count, Register64 input,
+                                  Register64 dest, Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+
+  Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64),
+      (64 - count.value) & 63);
+}
+
+void MacroAssembler::rotateRight64(Register count, Register64 input,
+                                   Register64 dest, Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+
+  Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64),
+      ARMRegister(count, 64));
+}
+
+void MacroAssembler::rotateRight64(Imm32 count, Register64 input,
+                                   Register64 dest, Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+
+  Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64), count.value & 63);
+}
+
+// ===============================================================
+// Bit counting functions
+
+void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) {
+  Clz(ARMRegister(dest, 32), ARMRegister(src, 32));
+}
+
+void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) {
+  Rbit(ARMRegister(dest, 32), ARMRegister(src, 32));
+  Clz(ARMRegister(dest, 32), ARMRegister(dest, 32));
+}
+
+void MacroAssembler::clz64(Register64 src, Register dest) {
+  Clz(ARMRegister(dest, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::ctz64(Register64 src, Register dest) {
+  Rbit(ARMRegister(dest, 64), ARMRegister(src.reg, 64));
+  Clz(ARMRegister(dest, 64), ARMRegister(dest, 64));
+}
+
+void MacroAssembler::popcnt32(Register src_, Register dest_, Register tmp_) {
+  MOZ_ASSERT(tmp_ != Register::Invalid());
+
+  // Equivalent to mozilla::CountPopulation32().
+
+  ARMRegister src(src_, 32);
+  ARMRegister dest(dest_, 32);
+  ARMRegister tmp(tmp_, 32);
+
+  Mov(tmp, src);
+  if (src_ != dest_) {
+    Mov(dest, src);
+  }
+  Lsr(dest, dest, 1);
+  And(dest, dest, 0x55555555);
+  Sub(dest, tmp, dest);
+  Lsr(tmp, dest, 2);
+  And(tmp, tmp, 0x33333333);
+  And(dest, dest, 0x33333333);
+  Add(dest, tmp, dest);
+  Add(dest, dest, Operand(dest, vixl::LSR, 4));
+  And(dest, dest, 0x0F0F0F0F);
+  Add(dest, dest, Operand(dest, vixl::LSL, 8));
+  Add(dest, dest, Operand(dest, vixl::LSL, 16));
+  Lsr(dest, dest, 24);
+}
+
+void MacroAssembler::popcnt64(Register64 src_, Register64 dest_,
+                              Register tmp_) {
+  MOZ_ASSERT(tmp_ != Register::Invalid());
+
+  // Equivalent to mozilla::CountPopulation64(), though likely more efficient.
+
+  ARMRegister src(src_.reg, 64);
+  ARMRegister dest(dest_.reg, 64);
+  ARMRegister tmp(tmp_, 64);
+
+  Mov(tmp, src);
+  if (src_ != dest_) {
+    Mov(dest, src);
+  }
+  Lsr(dest, dest, 1);
+  And(dest, dest, 0x5555555555555555);
+  Sub(dest, tmp, dest);
+  Lsr(tmp, dest, 2);
+  And(tmp, tmp, 0x3333333333333333);
+  And(dest, dest, 0x3333333333333333);
+  Add(dest, tmp, dest);
+  Add(dest, dest, Operand(dest, vixl::LSR, 4));
+  And(dest, dest, 0x0F0F0F0F0F0F0F0F);
+  Add(dest, dest, Operand(dest, vixl::LSL, 8));
+  Add(dest, dest, Operand(dest, vixl::LSL, 16));
+  Add(dest, dest, Operand(dest, vixl::LSL, 32));
+  Lsr(dest, dest, 56);
+}
+
+// ===============================================================
+// Branch functions
+
+void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs,
+                             Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      load8ZeroExtend(lhs, scratch);
+      branch32(cond, scratch, Imm32(uint8_t(rhs.value)), label);
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      load8SignExtend(lhs, scratch);
+      branch32(cond, scratch, Imm32(int8_t(rhs.value)), label);
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs,
+                             Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      load8ZeroExtend(lhs, scratch);
+      branch32(cond, scratch, rhs, label);
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      load8SignExtend(lhs, scratch);
+      branch32(cond, scratch, rhs, label);
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs,
+                              Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      load16ZeroExtend(lhs, scratch);
+      branch32(cond, scratch, Imm32(uint16_t(rhs.value)), label);
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      load16SignExtend(lhs, scratch);
+      branch32(cond, scratch, Imm32(int16_t(rhs.value)), label);
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+template <class L>
+void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs,
+                              L label) {
+  cmp32(lhs, rhs);
+  B(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 imm,
+                              L label) {
+  if (imm.value == 0 && cond == Assembler::Equal) {
+    Cbz(ARMRegister(lhs, 32), label);
+  } else if (imm.value == 0 && cond == Assembler::NotEqual) {
+    Cbnz(ARMRegister(lhs, 32), label);
+  } else {
+    cmp32(lhs, imm);
+    B(label, cond);
+  }
+}
+
+void MacroAssembler::branch32(Condition cond, Register lhs, const Address& rhs,
+                              Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs);
+  MOZ_ASSERT(scratch != rhs.base);
+  load32(rhs, scratch);
+  branch32(cond, lhs, scratch, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs,
+                              Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  MOZ_ASSERT(scratch != rhs);
+  load32(lhs, scratch);
+  branch32(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 imm,
+                              Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  load32(lhs, scratch);
+  branch32(cond, scratch, imm, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
+                              Register rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  movePtr(ImmPtr(lhs.addr), scratch);
+  branch32(cond, Address(scratch, 0), rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
+                              Imm32 rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  load32(lhs, scratch);
+  branch32(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs,
+                              Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  MOZ_ASSERT(scratch32.asUnsized() != lhs.base);
+  MOZ_ASSERT(scratch32.asUnsized() != lhs.index);
+  doBaseIndex(scratch32, lhs, vixl::LDR_w);
+  branch32(cond, scratch32.asUnsized(), rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, wasm::SymbolicAddress lhs,
+                              Imm32 rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  movePtr(lhs, scratch);
+  branch32(cond, Address(scratch, 0), rhs, label);
+}
+
+void MacroAssembler::branch64(Condition cond, Register64 lhs, Imm64 val,
+                              Label* success, Label* fail) {
+  if (val.value == 0 && cond == Assembler::Equal) {
+    Cbz(ARMRegister(lhs.reg, 64), success);
+  } else if (val.value == 0 && cond == Assembler::NotEqual) {
+    Cbnz(ARMRegister(lhs.reg, 64), success);
+  } else {
+    Cmp(ARMRegister(lhs.reg, 64), val.value);
+    B(success, cond);
+  }
+  if (fail) {
+    B(fail);
+  }
+}
+
+void MacroAssembler::branch64(Condition cond, Register64 lhs, Register64 rhs,
+                              Label* success, Label* fail) {
+  Cmp(ARMRegister(lhs.reg, 64), ARMRegister(rhs.reg, 64));
+  B(success, cond);
+  if (fail) {
+    B(fail);
+  }
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs, Imm64 val,
+                              Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+
+  branchPtr(cond, lhs, ImmWord(val.value), label);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs,
+                              Register64 rhs, Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+
+  branchPtr(cond, lhs, rhs.reg, label);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs,
+                              const Address& rhs, Register scratch,
+                              Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+  MOZ_ASSERT(lhs.base != scratch);
+  MOZ_ASSERT(rhs.base != scratch);
+
+  loadPtr(rhs, scratch);
+  branchPtr(cond, lhs, scratch, label);
+}
+
+template <class L>
+void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs,
+                               L label) {
+  Cmp(ARMRegister(lhs, 64), ARMRegister(rhs, 64));
+  B(label, cond);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs,
+                               Label* label) {
+  if (rhs.value == 0 && cond == Assembler::Equal) {
+    Cbz(ARMRegister(lhs, 64), label);
+  } else if (rhs.value == 0 && cond == Assembler::NotEqual) {
+    Cbnz(ARMRegister(lhs, 64), label);
+  } else {
+    cmpPtr(lhs, rhs);
+    B(label, cond);
+  }
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs,
+                               Label* label) {
+  if (rhs.value == 0 && cond == Assembler::Equal) {
+    Cbz(ARMRegister(lhs, 64), label);
+  } else if (rhs.value == 0 && cond == Assembler::NotEqual) {
+    Cbnz(ARMRegister(lhs, 64), label);
+  } else {
+    cmpPtr(lhs, rhs);
+    B(label, cond);
+  }
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs,
+                               Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs);
+  movePtr(rhs, scratch);
+  branchPtr(cond, lhs, scratch, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs,
+                               Label* label) {
+  if (rhs.value == 0 && cond == Assembler::Equal) {
+    Cbz(ARMRegister(lhs, 64), label);
+  } else if (rhs.value == 0 && cond == Assembler::NotEqual) {
+    Cbnz(ARMRegister(lhs, 64), label);
+  } else {
+    cmpPtr(lhs, rhs);
+    B(label, cond);
+  }
+}
+
+template <class L>
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs,
+                               L label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  MOZ_ASSERT(scratch != rhs);
+  loadPtr(lhs, scratch);
+  branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs,
+                               Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  loadPtr(lhs, scratch);
+  branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs,
+                               Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch1_64 = temps.AcquireX();
+  const ARMRegister scratch2_64 = temps.AcquireX();
+  MOZ_ASSERT(scratch1_64.asUnsized() != lhs.base);
+  MOZ_ASSERT(scratch2_64.asUnsized() != lhs.base);
+
+  movePtr(rhs, scratch1_64.asUnsized());
+  loadPtr(lhs, scratch2_64.asUnsized());
+  branchPtr(cond, scratch2_64.asUnsized(), scratch1_64.asUnsized(), label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs,
+                               Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  loadPtr(lhs, scratch);
+  branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
+                               Register rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != rhs);
+  loadPtr(lhs, scratch);
+  branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
+                               ImmWord rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  loadPtr(lhs, scratch);
+  branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, wasm::SymbolicAddress lhs,
+                               Register rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != rhs);
+  loadPtr(lhs, scratch);
+  branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
+                               ImmWord rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  MOZ_ASSERT(scratch != lhs.index);
+  loadPtr(lhs, scratch);
+  branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
+                               Register rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  MOZ_ASSERT(scratch != lhs.index);
+  loadPtr(lhs, scratch);
+  branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPrivatePtr(Condition cond, const Address& lhs,
+                                      Register rhs, Label* label) {
+  branchPtr(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs,
+                                 FloatRegister rhs, Label* label) {
+  compareFloat(cond, lhs, rhs);
+  switch (cond) {
+    case DoubleNotEqual: {
+      Label unordered;
+      // not equal *and* ordered
+      branch(Overflow, &unordered);
+      branch(NotEqual, label);
+      bind(&unordered);
+      break;
+    }
+    case DoubleEqualOrUnordered:
+      branch(Overflow, label);
+      branch(Equal, label);
+      break;
+    default:
+      branch(Condition(cond), label);
+  }
+}
+
+void MacroAssembler::branchTruncateFloat32MaybeModUint32(FloatRegister src,
+                                                         Register dest,
+                                                         Label* fail) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+
+  ARMFPRegister src32(src, 32);
+  ARMRegister dest64(dest, 64);
+
+  MOZ_ASSERT(!scratch64.Is(dest64));
+
+  // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+  // In the case of overflow, the output is saturated.
+  // In the case of NaN and -0, the output is zero.
+  Fcvtzs(dest64, src32);
+
+  // Fail if the result is saturated, i.e. it's either INT64_MIN or INT64_MAX.
+  Add(scratch64, dest64, Operand(0x7fff'ffff'ffff'ffff));
+  Cmn(scratch64, 3);
+  B(fail, Assembler::Above);
+
+  // Clear upper 32 bits.
+  Uxtw(dest64, dest64);
+}
+
+void MacroAssembler::branchTruncateFloat32ToInt32(FloatRegister src,
+                                                  Register dest, Label* fail) {
+  convertFloat32ToInt32(src, dest, fail, false);
+}
+
+void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs,
+                                  FloatRegister rhs, Label* label) {
+  compareDouble(cond, lhs, rhs);
+  switch (cond) {
+    case DoubleNotEqual: {
+      Label unordered;
+      // not equal *and* ordered
+      branch(Overflow, &unordered);
+      branch(NotEqual, label);
+      bind(&unordered);
+      break;
+    }
+    case DoubleEqualOrUnordered:
+      branch(Overflow, label);
+      branch(Equal, label);
+      break;
+    default:
+      branch(Condition(cond), label);
+  }
+}
+
+void MacroAssembler::branchTruncateDoubleMaybeModUint32(FloatRegister src,
+                                                        Register dest,
+                                                        Label* fail) {
+  // ARMv8.3 chips support the FJCVTZS instruction, which handles exactly this
+  // logic. But the simulator does not implement it, and when the simulator runs
+  // on ARM64 hardware we want to override vixl's detection of it.
+#if defined(JS_SIMULATOR_ARM64) && (defined(__aarch64__) || defined(_M_ARM64))
+  const bool fjscvt = false;
+#else
+  const bool fjscvt = CPUHas(vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
+#endif
+  if (fjscvt) {
+    Fjcvtzs(ARMRegister(dest, 32), ARMFPRegister(src, 64));
+    return;
+  }
+
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+
+  // An out of range integer will be saturated to the destination size.
+  ARMFPRegister src64(src, 64);
+  ARMRegister dest64(dest, 64);
+
+  MOZ_ASSERT(!scratch64.Is(dest64));
+
+  // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+  // In the case of overflow, the output is saturated.
+  // In the case of NaN and -0, the output is zero.
+  Fcvtzs(dest64, src64);
+
+  // Fail if the result is saturated, i.e. it's either INT64_MIN or INT64_MAX.
+  Add(scratch64, dest64, Operand(0x7fff'ffff'ffff'ffff));
+  Cmn(scratch64, 3);
+  B(fail, Assembler::Above);
+
+  // Clear upper 32 bits.
+  Uxtw(dest64, dest64);
+}
+
+void MacroAssembler::branchTruncateDoubleToInt32(FloatRegister src,
+                                                 Register dest, Label* fail) {
+  ARMFPRegister src64(src, 64);
+  ARMRegister dest64(dest, 64);
+  ARMRegister dest32(dest, 32);
+
+  // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+  // In the case of overflow, the output is saturated.
+  // In the case of NaN and -0, the output is zero.
+  Fcvtzs(dest64, src64);
+
+  // Fail on overflow cases.
+  Cmp(dest64, Operand(dest32, vixl::SXTW));
+  B(fail, Assembler::NotEqual);
+
+  // Clear upper 32 bits.
+  Uxtw(dest64, dest64);
+}
+
+template <typename T>
+void MacroAssembler::branchAdd32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  adds32(src, dest);
+  B(label, cond);
+}
+
+template <typename T>
+void MacroAssembler::branchSub32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  subs32(src, dest);
+  branch(cond, label);
+}
+
+template <typename T>
+void MacroAssembler::branchMul32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  MOZ_ASSERT(cond == Assembler::Overflow);
+  vixl::UseScratchRegisterScope temps(this);
+  mul32(src, dest, dest, label);
+}
+
+template <typename T>
+void MacroAssembler::branchRshift32(Condition cond, T src, Register dest,
+                                    Label* label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero);
+  rshift32(src, dest);
+  branch32(cond == Zero ? Equal : NotEqual, dest, Imm32(0), label);
+}
+
+void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) {
+  MOZ_ASSERT(cond == Overflow);
+  negs32(reg);
+  B(label, cond);
+}
+
+template <typename T>
+void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest,
+                                  Label* label) {
+  adds64(src, dest);
+  B(label, cond);
+}
+
+template <typename T>
+void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest,
+                                  Label* label) {
+  subs64(src, dest);
+  B(label, cond);
+}
+
+void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest,
+                                  Label* label) {
+  MOZ_ASSERT(cond == Assembler::Overflow);
+
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  const ARMRegister src64(src, 64);
+  const ARMRegister dest64(dest, 64);
+
+  Smulh(scratch64, dest64, src64);
+  Mul(dest64, dest64, src64);
+  Cmp(scratch64, Operand(dest64, vixl::ASR, 63));
+  B(label, NotEqual);
+}
+
+void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs,
+                                  Label* label) {
+  Subs(ARMRegister(lhs, 64), ARMRegister(lhs, 64), Operand(rhs.value));
+  B(cond, label);
+}
+
+template <class L>
+void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs,
+                                  L label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+             cond == NotSigned);
+  // The x86-biased front end prefers |test foo, foo| to |cmp foo, #0|.  We look
+  // for the former pattern and expand as Cbz/Cbnz when possible.
+  if (lhs == rhs && cond == Zero) {
+    Cbz(ARMRegister(lhs, 32), label);
+  } else if (lhs == rhs && cond == NonZero) {
+    Cbnz(ARMRegister(lhs, 32), label);
+  } else {
+    test32(lhs, rhs);
+    B(label, cond);
+  }
+}
+
+template <class L>
+void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs,
+                                  L label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+             cond == NotSigned);
+  test32(lhs, rhs);
+  B(label, cond);
+}
+
+void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs,
+                                  Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  load32(lhs, scratch);
+  branchTest32(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchTest32(Condition cond, const AbsoluteAddress& lhs,
+                                  Imm32 rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  load32(lhs, scratch);
+  branchTest32(cond, scratch, rhs, label);
+}
+
+template <class L>
+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs,
+                                   L label) {
+  // See branchTest32.
+  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+             cond == NotSigned);
+  if (lhs == rhs && cond == Zero) {
+    Cbz(ARMRegister(lhs, 64), label);
+  } else if (lhs == rhs && cond == NonZero) {
+    Cbnz(ARMRegister(lhs, 64), label);
+  } else {
+    Tst(ARMRegister(lhs, 64), Operand(ARMRegister(rhs, 64)));
+    B(label, cond);
+  }
+}
+
+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs,
+                                   Label* label) {
+  Tst(ARMRegister(lhs, 64), Operand(rhs.value));
+  B(label, cond);
+}
+
+void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs,
+                                   Imm32 rhs, Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  MOZ_ASSERT(scratch != lhs.base);
+  loadPtr(lhs, scratch);
+  branchTestPtr(cond, scratch, rhs, label);
+}
+
+template <class L>
+void MacroAssembler::branchTest64(Condition cond, Register64 lhs,
+                                  Register64 rhs, Register temp, L label) {
+  branchTestPtr(cond, lhs.reg, rhs.reg, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond, Register tag,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond, const Address& address,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond,
+                                         const BaseIndex& address,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond,
+                                         const ValueOperand& value,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t,
+                                             Label* label) {
+  Condition c = testUndefined(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, Register tag,
+                                     Label* label) {
+  branchTestInt32Impl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const Address& address,
+                                     Label* label) {
+  branchTestInt32Impl(cond, address, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address,
+                                     Label* label) {
+  branchTestInt32Impl(cond, address, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value,
+                                     Label* label) {
+  branchTestInt32Impl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestInt32Impl(Condition cond, const T& t,
+                                         Label* label) {
+  Condition c = testInt32(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestInt32Truthy(bool truthy,
+                                           const ValueOperand& value,
+                                           Label* label) {
+  Condition c = testInt32Truthy(truthy, value);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testDouble(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg,
+                                            Label* label) {
+  Fcmp(ARMFPRegister(reg, 64), 0.0);
+  if (!truthy) {
+    // falsy values are zero, and NaN.
+    branch(Zero, label);
+    branch(Overflow, label);
+  } else {
+    // truthy values are non-zero and not nan.
+    // If it is overflow
+    Label onFalse;
+    branch(Zero, &onFalse);
+    branch(Overflow, &onFalse);
+    B(label);
+    bind(&onFalse);
+  }
+}
+
+void MacroAssembler::branchTestNumber(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestNumberImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestNumberImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestNumberImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testNumber(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, Register tag,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, const Address& address,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond,
+                                       const ValueOperand& value,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestBooleanImpl(Condition cond, const T& tag,
+                                           Label* label) {
+  Condition c = testBoolean(cond, tag);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestBooleanTruthy(bool truthy,
+                                             const ValueOperand& value,
+                                             Label* label) {
+  Condition c = testBooleanTruthy(truthy, value);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestString(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestStringImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestStringImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestStringImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestStringImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestStringImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testString(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestStringTruthy(bool truthy,
+                                            const ValueOperand& value,
+                                            Label* label) {
+  Condition c = testStringTruthy(truthy, value);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testSymbol(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestBigIntImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testBigInt(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestBigIntTruthy(bool truthy,
+                                            const ValueOperand& value,
+                                            Label* label) {
+  Condition c = testBigIntTruthy(truthy, value);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, Register tag,
+                                    Label* label) {
+  branchTestNullImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const Address& address,
+                                    Label* label) {
+  branchTestNullImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address,
+                                    Label* label) {
+  branchTestNullImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value,
+                                    Label* label) {
+  branchTestNullImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestNullImpl(Condition cond, const T& t,
+                                        Label* label) {
+  Condition c = testNull(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestObjectImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestObjectImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestObjectImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestObjectImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestObjectImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testObject(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond, const Address& address,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond,
+                                       const ValueOperand& value,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& src,
+                                           Label* label) {
+  Condition c = testGCThing(cond, src);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestPrimitive(Condition cond, Register tag,
+                                         Label* label) {
+  branchTestPrimitiveImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestPrimitive(Condition cond,
+                                         const ValueOperand& value,
+                                         Label* label) {
+  branchTestPrimitiveImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t,
+                                             Label* label) {
+  Condition c = testPrimitive(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, Register tag,
+                                     Label* label) {
+  branchTestMagicImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const Address& address,
+                                     Label* label) {
+  branchTestMagicImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address,
+                                     Label* label) {
+  branchTestMagicImpl(cond, address, label);
+}
+
+template <class L>
+void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value,
+                                     L label) {
+  branchTestMagicImpl(cond, value, label);
+}
+
+template <typename T, class L>
+void MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label) {
+  Condition c = testMagic(cond, t);
+  B(label, c);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const Address& valaddr,
+                                     JSWhyMagic why, Label* label) {
+  uint64_t magic = MagicValue(why).asRawBits();
+  cmpPtr(valaddr, ImmWord(magic));
+  B(label, cond);
+}
+
+void MacroAssembler::branchTestValue(Condition cond, const BaseIndex& lhs,
+                                     const ValueOperand& rhs, Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  branchPtr(cond, lhs, rhs.valueReg(), label);
+}
+
+template <typename T>
+void MacroAssembler::testNumberSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testNumber(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testBooleanSet(Condition cond, const T& src,
+                                    Register dest) {
+  cond = testBoolean(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testStringSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testString(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testSymbolSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testSymbol(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testBigIntSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testBigInt(cond, src);
+  emitSet(cond, dest);
+}
+
+void MacroAssembler::branchToComputedAddress(const BaseIndex& addr) {
+  vixl::UseScratchRegisterScope temps(&this->asVIXL());
+  const ARMRegister scratch64 = temps.AcquireX();
+  loadPtr(addr, scratch64.asUnsized());
+  Br(scratch64);
+}
+
+void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs,
+                                 Register src, Register dest) {
+  cmp32(lhs, rhs);
+  Csel(ARMRegister(dest, 32), ARMRegister(src, 32), ARMRegister(dest, 32),
+       cond);
+}
+
+void MacroAssembler::cmp32Move32(Condition cond, Register lhs,
+                                 const Address& rhs, Register src,
+                                 Register dest) {
+  MOZ_CRASH("NYI");
+}
+
+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, Register rhs,
+                                   Register src, Register dest) {
+  cmpPtr(lhs, rhs);
+  Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64),
+       cond);
+}
+
+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs,
+                                   const Address& rhs, Register src,
+                                   Register dest) {
+  MOZ_CRASH("NYI");
+}
+
+void MacroAssembler::cmp32Load32(Condition cond, Register lhs,
+                                 const Address& rhs, const Address& src,
+                                 Register dest) {
+  MOZ_CRASH("NYI");
+}
+
+void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs,
+                                 const Address& src, Register dest) {
+  MOZ_CRASH("NYI");
+}
+
+void MacroAssembler::cmp32MovePtr(Condition cond, Register lhs, Imm32 rhs,
+                                  Register src, Register dest) {
+  cmp32(lhs, rhs);
+  Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64),
+       cond);
+}
+
+void MacroAssembler::cmp32LoadPtr(Condition cond, const Address& lhs, Imm32 rhs,
+                                  const Address& src, Register dest) {
+  // ARM64 does not support conditional loads, so we use a branch with a CSel
+  // (to prevent Spectre attacks).
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+
+  // Can't use branch32() here, because it may select Cbz/Cbnz which don't
+  // affect condition flags.
+  Label done;
+  cmp32(lhs, rhs);
+  B(&done, Assembler::InvertCondition(cond));
+
+  loadPtr(src, scratch64.asUnsized());
+  Csel(ARMRegister(dest, 64), scratch64, ARMRegister(dest, 64), cond);
+  bind(&done);
+}
+
+void MacroAssembler::test32LoadPtr(Condition cond, const Address& addr,
+                                   Imm32 mask, const Address& src,
+                                   Register dest) {
+  MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero);
+
+  // ARM64 does not support conditional loads, so we use a branch with a CSel
+  // (to prevent Spectre attacks).
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  Label done;
+  branchTest32(Assembler::InvertCondition(cond), addr, mask, &done);
+  loadPtr(src, scratch64.asUnsized());
+  Csel(ARMRegister(dest, 64), scratch64, ARMRegister(dest, 64), cond);
+  bind(&done);
+}
+
+void MacroAssembler::test32MovePtr(Condition cond, const Address& addr,
+                                   Imm32 mask, Register src, Register dest) {
+  MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero);
+  test32(addr, mask);
+  Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64),
+       cond);
+}
+
+void MacroAssembler::spectreMovePtr(Condition cond, Register src,
+                                    Register dest) {
+  Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64),
+       cond);
+}
+
+void MacroAssembler::spectreZeroRegister(Condition cond, Register,
+                                         Register dest) {
+  Csel(ARMRegister(dest, 64), ARMRegister(dest, 64), vixl::xzr,
+       Assembler::InvertCondition(cond));
+}
+
+void MacroAssembler::spectreBoundsCheck32(Register index, Register length,
+                                          Register maybeScratch,
+                                          Label* failure) {
+  MOZ_ASSERT(length != maybeScratch);
+  MOZ_ASSERT(index != maybeScratch);
+
+  branch32(Assembler::BelowOrEqual, length, index, failure);
+
+  if (JitOptions.spectreIndexMasking) {
+    Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr,
+         Assembler::Above);
+  }
+}
+
+void MacroAssembler::spectreBoundsCheck32(Register index, const Address& length,
+                                          Register maybeScratch,
+                                          Label* failure) {
+  MOZ_ASSERT(index != length.base);
+  MOZ_ASSERT(length.base != maybeScratch);
+  MOZ_ASSERT(index != maybeScratch);
+
+  branch32(Assembler::BelowOrEqual, length, index, failure);
+
+  if (JitOptions.spectreIndexMasking) {
+    Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr,
+         Assembler::Above);
+  }
+}
+
+void MacroAssembler::spectreBoundsCheckPtr(Register index, Register length,
+                                           Register maybeScratch,
+                                           Label* failure) {
+  MOZ_ASSERT(length != maybeScratch);
+  MOZ_ASSERT(index != maybeScratch);
+
+  branchPtr(Assembler::BelowOrEqual, length, index, failure);
+
+  if (JitOptions.spectreIndexMasking) {
+    Csel(ARMRegister(index, 64), ARMRegister(index, 64), vixl::xzr,
+         Assembler::Above);
+  }
+}
+
+void MacroAssembler::spectreBoundsCheckPtr(Register index,
+                                           const Address& length,
+                                           Register maybeScratch,
+                                           Label* failure) {
+  MOZ_ASSERT(index != length.base);
+  MOZ_ASSERT(length.base != maybeScratch);
+  MOZ_ASSERT(index != maybeScratch);
+
+  branchPtr(Assembler::BelowOrEqual, length, index, failure);
+
+  if (JitOptions.spectreIndexMasking) {
+    Csel(ARMRegister(index, 64), ARMRegister(index, 64), vixl::xzr,
+         Assembler::Above);
+  }
+}
+
+// ========================================================================
+// Memory access primitives.
+void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src,
+                                                const Address& dest) {
+  Str(ARMFPRegister(src, 64), toMemOperand(dest));
+}
+void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src,
+                                                const BaseIndex& dest) {
+  doBaseIndex(ARMFPRegister(src, 64), dest, vixl::STR_d);
+}
+
+void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src,
+                                                 const Address& addr) {
+  Str(ARMFPRegister(src, 32), toMemOperand(addr));
+}
+void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src,
+                                                 const BaseIndex& addr) {
+  doBaseIndex(ARMFPRegister(src, 32), addr, vixl::STR_s);
+}
+
+void MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) {
+  // Bug 1715494: Discriminating barriers such as StoreStore are hard to reason
+  // about.  Execute the full barrier for everything that requires a barrier.
+  if (barrier) {
+    Dmb(vixl::InnerShareable, vixl::BarrierAll);
+  }
+}
+
+// ===============================================================
+// Clamping functions.
+
+void MacroAssembler::clampIntToUint8(Register reg) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch32 = temps.AcquireW();
+  const ARMRegister reg32(reg, 32);
+  MOZ_ASSERT(!scratch32.Is(reg32));
+
+  Cmp(reg32, Operand(reg32, vixl::UXTB));
+  Csel(reg32, reg32, vixl::wzr, Assembler::GreaterThanOrEqual);
+  Mov(scratch32, Operand(0xff));
+  Csel(reg32, reg32, scratch32, Assembler::LessThanOrEqual);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const ValueOperand& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  MOZ_ASSERT(type == JSVAL_TYPE_OBJECT || type == JSVAL_TYPE_STRING ||
+             type == JSVAL_TYPE_SYMBOL || type == JSVAL_TYPE_BIGINT);
+  // dest := src XOR mask
+  // fail if dest >> JSVAL_TAG_SHIFT != 0
+  const ARMRegister src64(src.valueReg(), 64);
+  const ARMRegister dest64(dest, 64);
+  Eor(dest64, src64, Operand(JSVAL_TYPE_TO_SHIFTED_TAG(type)));
+  Cmp(vixl::xzr, Operand(dest64, vixl::LSR, JSVAL_TAG_SHIFT));
+  j(Assembler::NotEqual, fail);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const Address& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  loadValue(src, ValueOperand(dest));
+  fallibleUnboxPtr(ValueOperand(dest), dest, type, fail);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const BaseIndex& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  loadValue(src, ValueOperand(dest));
+  fallibleUnboxPtr(ValueOperand(dest), dest, type, fail);
+}
+
+//}}} check_macroassembler_style
+
+// Wasm SIMD
+
+static inline ARMFPRegister SimdReg(FloatRegister r) {
+  MOZ_ASSERT(r.isSimd128());
+  return ARMFPRegister(r, 128);
+}
+
+static inline ARMFPRegister Simd16B(FloatRegister r) {
+  return SimdReg(r).V16B();
+}
+
+static inline ARMFPRegister Simd8B(FloatRegister r) { return SimdReg(r).V8B(); }
+
+static inline ARMFPRegister Simd8H(FloatRegister r) { return SimdReg(r).V8H(); }
+
+static inline ARMFPRegister Simd4H(FloatRegister r) { return SimdReg(r).V4H(); }
+
+static inline ARMFPRegister Simd4S(FloatRegister r) { return SimdReg(r).V4S(); }
+
+static inline ARMFPRegister Simd2S(FloatRegister r) { return SimdReg(r).V2S(); }
+
+static inline ARMFPRegister Simd2D(FloatRegister r) { return SimdReg(r).V2D(); }
+
+static inline ARMFPRegister Simd1D(FloatRegister r) { return SimdReg(r).V1D(); }
+
+static inline ARMFPRegister SimdQ(FloatRegister r) { return SimdReg(r).Q(); }
+
+//{{{ check_macroassembler_style
+
+// Moves
+
+void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) {
+  if (src != dest) {
+    Mov(SimdReg(dest), SimdReg(src));
+  }
+}
+
+void MacroAssembler::loadConstantSimd128(const SimdConstant& v,
+                                         FloatRegister dest) {
+  // Movi does not yet generate good code for many cases, bug 1664397.
+  SimdConstant c = SimdConstant::CreateX2((const int64_t*)v.bytes());
+  Movi(SimdReg(dest), c.asInt64x2()[1], c.asInt64x2()[0]);
+}
+
+// Splat
+
+void MacroAssembler::splatX16(Register src, FloatRegister dest) {
+  Dup(Simd16B(dest), ARMRegister(src, 32));
+}
+
+void MacroAssembler::splatX16(uint32_t srcLane, FloatRegister src,
+                              FloatRegister dest) {
+  Dup(Simd16B(dest), Simd16B(src), srcLane);
+}
+
+void MacroAssembler::splatX8(Register src, FloatRegister dest) {
+  Dup(Simd8H(dest), ARMRegister(src, 32));
+}
+
+void MacroAssembler::splatX8(uint32_t srcLane, FloatRegister src,
+                             FloatRegister dest) {
+  Dup(Simd8H(dest), Simd8H(src), srcLane);
+}
+
+void MacroAssembler::splatX4(Register src, FloatRegister dest) {
+  Dup(Simd4S(dest), ARMRegister(src, 32));
+}
+
+void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) {
+  Dup(Simd4S(dest), ARMFPRegister(src), 0);
+}
+
+void MacroAssembler::splatX2(Register64 src, FloatRegister dest) {
+  Dup(Simd2D(dest), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::splatX2(FloatRegister src, FloatRegister dest) {
+  Dup(Simd2D(dest), ARMFPRegister(src), 0);
+}
+
+// Extract lane as scalar.  Float extraction does not canonicalize the value.
+
+void MacroAssembler::extractLaneInt8x16(uint32_t lane, FloatRegister src,
+                                        Register dest_) {
+  MOZ_ASSERT(lane < 16);
+  ARMRegister dest(dest_, 32);
+  Umov(dest, Simd4S(src), lane / 4);
+  Sbfx(dest, dest, (lane % 4) * 8, 8);
+}
+
+void MacroAssembler::unsignedExtractLaneInt8x16(uint32_t lane,
+                                                FloatRegister src,
+                                                Register dest_) {
+  MOZ_ASSERT(lane < 16);
+  ARMRegister dest(dest_, 32);
+  Umov(dest, Simd4S(src), lane / 4);
+  Ubfx(dest, dest, (lane % 4) * 8, 8);
+}
+
+void MacroAssembler::extractLaneInt16x8(uint32_t lane, FloatRegister src,
+                                        Register dest_) {
+  MOZ_ASSERT(lane < 8);
+  ARMRegister dest(dest_, 32);
+  Umov(dest, Simd4S(src), lane / 2);
+  Sbfx(dest, dest, (lane % 2) * 16, 16);
+}
+
+void MacroAssembler::unsignedExtractLaneInt16x8(uint32_t lane,
+                                                FloatRegister src,
+                                                Register dest_) {
+  MOZ_ASSERT(lane < 8);
+  ARMRegister dest(dest_, 32);
+  Umov(dest, Simd4S(src), lane / 2);
+  Ubfx(dest, dest, (lane % 2) * 16, 16);
+}
+
+void MacroAssembler::extractLaneInt32x4(uint32_t lane, FloatRegister src,
+                                        Register dest_) {
+  MOZ_ASSERT(lane < 4);
+  ARMRegister dest(dest_, 32);
+  Umov(dest, Simd4S(src), lane);
+}
+
+void MacroAssembler::extractLaneInt64x2(uint32_t lane, FloatRegister src,
+                                        Register64 dest_) {
+  MOZ_ASSERT(lane < 2);
+  ARMRegister dest(dest_.reg, 64);
+  Umov(dest, Simd2D(src), lane);
+}
+
+void MacroAssembler::extractLaneFloat32x4(uint32_t lane, FloatRegister src,
+                                          FloatRegister dest) {
+  MOZ_ASSERT(lane < 4);
+  Mov(ARMFPRegister(dest).V4S(), 0, Simd4S(src), lane);
+}
+
+void MacroAssembler::extractLaneFloat64x2(uint32_t lane, FloatRegister src,
+                                          FloatRegister dest) {
+  MOZ_ASSERT(lane < 2);
+  Mov(ARMFPRegister(dest).V2D(), 0, Simd2D(src), lane);
+}
+
+// Replace lane value
+
+void MacroAssembler::replaceLaneInt8x16(unsigned lane, Register rhs,
+                                        FloatRegister lhsDest) {
+  MOZ_ASSERT(lane < 16);
+  Mov(Simd16B(lhsDest), lane, ARMRegister(rhs, 32));
+}
+
+void MacroAssembler::replaceLaneInt16x8(unsigned lane, Register rhs,
+                                        FloatRegister lhsDest) {
+  MOZ_ASSERT(lane < 8);
+  Mov(Simd8H(lhsDest), lane, ARMRegister(rhs, 32));
+}
+
+void MacroAssembler::replaceLaneInt32x4(unsigned lane, Register rhs,
+                                        FloatRegister lhsDest) {
+  MOZ_ASSERT(lane < 4);
+  Mov(Simd4S(lhsDest), lane, ARMRegister(rhs, 32));
+}
+
+void MacroAssembler::replaceLaneInt64x2(unsigned lane, Register64 rhs,
+                                        FloatRegister lhsDest) {
+  MOZ_ASSERT(lane < 2);
+  Mov(Simd2D(lhsDest), lane, ARMRegister(rhs.reg, 64));
+}
+
+void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister rhs,
+                                          FloatRegister lhsDest) {
+  MOZ_ASSERT(lane < 4);
+  Mov(Simd4S(lhsDest), lane, ARMFPRegister(rhs).V4S(), 0);
+}
+
+void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister rhs,
+                                          FloatRegister lhsDest) {
+  MOZ_ASSERT(lane < 2);
+  Mov(Simd2D(lhsDest), lane, ARMFPRegister(rhs).V2D(), 0);
+}
+
+// Shuffle - blend and permute with immediate indices, and its many
+// specializations.  Lane values other than those mentioned are illegal.
+
+// lane values 0..31
+void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister lhs,
+                                    FloatRegister rhs, FloatRegister dest) {
+  // The general solution generates ho-hum code.  Realistic programs will use
+  // patterns that can be specialized, and this will be much better.  That will
+  // be handled by bug 1656834, so don't worry about it here.
+
+  // Set scratch to the lanevalue when it selects from lhs or ~lanevalue when it
+  // selects from rhs.
+  ScratchSimd128Scope scratch(*this);
+  int8_t idx[16];
+
+  if (lhs == rhs) {
+    for (unsigned i = 0; i < 16; i++) {
+      idx[i] = lanes[i] < 16 ? lanes[i] : (lanes[i] - 16);
+    }
+    loadConstantSimd128(SimdConstant::CreateX16(idx), scratch);
+    Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(scratch));
+    return;
+  }
+
+  if (rhs != dest) {
+    for (unsigned i = 0; i < 16; i++) {
+      idx[i] = lanes[i] < 16 ? lanes[i] : ~(lanes[i] - 16);
+    }
+  } else {
+    MOZ_ASSERT(lhs != dest);
+    for (unsigned i = 0; i < 16; i++) {
+      idx[i] = lanes[i] < 16 ? ~lanes[i] : (lanes[i] - 16);
+    }
+    std::swap(lhs, rhs);
+  }
+  loadConstantSimd128(SimdConstant::CreateX16(idx), scratch);
+  Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(scratch));
+  Not(Simd16B(scratch), Simd16B(scratch));
+  Tbx(Simd16B(dest), Simd16B(rhs), Simd16B(scratch));
+}
+
+void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister rhs,
+                                    FloatRegister lhsDest) {
+  shuffleInt8x16(lanes, lhsDest, rhs, lhsDest);
+}
+
+void MacroAssembler::blendInt8x16(const uint8_t lanes[16], FloatRegister lhs,
+                                  FloatRegister rhs, FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  int8_t lanes_[16];
+
+  if (rhs == dest) {
+    for (unsigned i = 0; i < 16; i++) {
+      lanes_[i] = lanes[i] == 0 ? i : 16 + i;
+    }
+    loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch);
+    Tbx(Simd16B(dest), Simd16B(lhs), Simd16B(scratch));
+    return;
+  }
+
+  moveSimd128(lhs, dest);
+  for (unsigned i = 0; i < 16; i++) {
+    lanes_[i] = lanes[i] != 0 ? i : 16 + i;
+  }
+  loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch);
+  Tbx(Simd16B(dest), Simd16B(rhs), Simd16B(scratch));
+}
+
+void MacroAssembler::blendInt16x8(const uint16_t lanes[8], FloatRegister lhs,
+                                  FloatRegister rhs, FloatRegister dest) {
+  static_assert(sizeof(const uint16_t /*lanes*/[8]) == sizeof(uint8_t[16]));
+  blendInt8x16(reinterpret_cast<const uint8_t*>(lanes), lhs, rhs, dest);
+}
+
+void MacroAssembler::laneSelectSimd128(FloatRegister mask, FloatRegister lhs,
+                                       FloatRegister rhs, FloatRegister dest) {
+  MOZ_ASSERT(mask == dest);
+  Bsl(Simd16B(mask), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::interleaveHighInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Zip2(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::interleaveHighInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Zip2(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::interleaveHighInt64x2(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Zip2(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+void MacroAssembler::interleaveHighInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Zip2(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::interleaveLowInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                          FloatRegister dest) {
+  Zip1(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::interleaveLowInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                          FloatRegister dest) {
+  Zip1(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::interleaveLowInt64x2(FloatRegister lhs, FloatRegister rhs,
+                                          FloatRegister dest) {
+  Zip1(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+void MacroAssembler::interleaveLowInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                          FloatRegister dest) {
+  Zip1(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::permuteInt8x16(const uint8_t lanes[16], FloatRegister src,
+                                    FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  loadConstantSimd128(SimdConstant::CreateX16((const int8_t*)lanes), scratch);
+  Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch));
+}
+
+void MacroAssembler::permuteInt16x8(const uint16_t lanes[8], FloatRegister src,
+                                    FloatRegister dest) {
+  MOZ_ASSERT(lanes[0] < 8 && lanes[1] < 8 && lanes[2] < 8 && lanes[3] < 8 &&
+             lanes[4] < 8 && lanes[5] < 8 && lanes[6] < 8 && lanes[7] < 8);
+  const int8_t lanes_[16] = {
+      (int8_t)(lanes[0] << 1), (int8_t)((lanes[0] << 1) + 1),
+      (int8_t)(lanes[1] << 1), (int8_t)((lanes[1] << 1) + 1),
+      (int8_t)(lanes[2] << 1), (int8_t)((lanes[2] << 1) + 1),
+      (int8_t)(lanes[3] << 1), (int8_t)((lanes[3] << 1) + 1),
+      (int8_t)(lanes[4] << 1), (int8_t)((lanes[4] << 1) + 1),
+      (int8_t)(lanes[5] << 1), (int8_t)((lanes[5] << 1) + 1),
+      (int8_t)(lanes[6] << 1), (int8_t)((lanes[6] << 1) + 1),
+      (int8_t)(lanes[7] << 1), (int8_t)((lanes[7] << 1) + 1),
+  };
+  ScratchSimd128Scope scratch(*this);
+  loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch);
+  Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch));
+}
+
+void MacroAssembler::permuteInt32x4(const uint32_t lanes[4], FloatRegister src,
+                                    FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  const int8_t lanes_[16] = {
+      (int8_t)(lanes[0] << 2),       (int8_t)((lanes[0] << 2) + 1),
+      (int8_t)((lanes[0] << 2) + 2), (int8_t)((lanes[0] << 2) + 3),
+      (int8_t)(lanes[1] << 2),       (int8_t)((lanes[1] << 2) + 1),
+      (int8_t)((lanes[1] << 2) + 2), (int8_t)((lanes[1] << 2) + 3),
+      (int8_t)(lanes[2] << 2),       (int8_t)((lanes[2] << 2) + 1),
+      (int8_t)((lanes[2] << 2) + 2), (int8_t)((lanes[2] << 2) + 3),
+      (int8_t)(lanes[3] << 2),       (int8_t)((lanes[3] << 2) + 1),
+      (int8_t)((lanes[3] << 2) + 2), (int8_t)((lanes[3] << 2) + 3),
+  };
+  loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch);
+  Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch));
+}
+
+void MacroAssembler::rotateRightSimd128(FloatRegister src, FloatRegister dest,
+                                        uint32_t shift) {
+  Ext(Simd16B(dest), Simd16B(src), Simd16B(src), shift);
+}
+
+void MacroAssembler::leftShiftSimd128(Imm32 count, FloatRegister src,
+                                      FloatRegister dest) {
+  MOZ_ASSERT(count.value < 16);
+  ScratchSimd128Scope scratch(*this);
+  Movi(Simd16B(scratch), 0);
+  Ext(Simd16B(dest), Simd16B(scratch), Simd16B(src), 16 - count.value);
+}
+
+void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src,
+                                       FloatRegister dest) {
+  MOZ_ASSERT(count.value < 16);
+  ScratchSimd128Scope scratch(*this);
+  Movi(Simd16B(scratch), 0);
+  Ext(Simd16B(dest), Simd16B(src), Simd16B(scratch), count.value);
+}
+
+void MacroAssembler::concatAndRightShiftSimd128(FloatRegister lhs,
+                                                FloatRegister rhs,
+                                                FloatRegister dest,
+                                                uint32_t shift) {
+  MOZ_ASSERT(shift < 16);
+  Ext(Simd16B(dest), Simd16B(rhs), Simd16B(lhs), shift);
+}
+
+// Reverse bytes in lanes.
+
+void MacroAssembler::reverseInt16x8(FloatRegister src, FloatRegister dest) {
+  Rev16(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::reverseInt32x4(FloatRegister src, FloatRegister dest) {
+  Rev32(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::reverseInt64x2(FloatRegister src, FloatRegister dest) {
+  Rev64(Simd16B(dest), Simd16B(src));
+}
+
+// Swizzle - permute with variable indices.  `rhs` holds the lanes parameter.
+
+void MacroAssembler::swizzleInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                    FloatRegister dest) {
+  Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::swizzleInt8x16Relaxed(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+// Integer Add
+
+void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Add(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Add(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Add(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::addInt64x2(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Add(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Integer Subtract
+
+void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Sub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Sub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Sub(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::subInt64x2(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Sub(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Integer Multiply
+
+void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Mul(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Mul(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest, FloatRegister temp1,
+                                FloatRegister temp2) {
+  // As documented at https://chromium-review.googlesource.com/c/v8/v8/+/1781696
+  // lhs = <D C> <B A>
+  // rhs = <H G> <F E>
+  // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low>
+  ScratchSimd128Scope scratch(*this);
+  Rev64(Simd4S(temp2), Simd4S(lhs));                  // temp2 = <C D> <A B>
+  Mul(Simd4S(temp2), Simd4S(temp2), Simd4S(rhs));     // temp2 = <CH DG> <AF BE>
+  Xtn(Simd2S(temp1), Simd2D(rhs));                    // temp1 = <0 0> <G E>
+  Addp(Simd4S(temp2), Simd4S(temp2), Simd4S(temp2));  // temp2 = <CH+DG AF+BE>..
+  Xtn(Simd2S(scratch), Simd2D(lhs));                  // scratch = <0 0> <C A>
+  Shll(Simd2D(dest), Simd2S(temp2), 32);              // dest = <(DG+CH)_low 0>
+                                                      //        <(BE+AF)_low 0>
+  Umlal(Simd2D(dest), Simd2S(scratch), Simd2S(temp1));
+}
+
+void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                      FloatRegister dest) {
+  Smull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
+}
+
+void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
+                                              FloatRegister rhs,
+                                              FloatRegister dest) {
+  Umull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
+}
+
+void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
+                                               FloatRegister rhs,
+                                               FloatRegister dest) {
+  Umull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                      FloatRegister dest) {
+  Smull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
+}
+
+void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
+                                              FloatRegister rhs,
+                                              FloatRegister dest) {
+  Umull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
+}
+
+void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
+                                               FloatRegister rhs,
+                                               FloatRegister dest) {
+  Umull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                      FloatRegister dest) {
+  Smull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
+}
+
+void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  Smull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
+                                              FloatRegister rhs,
+                                              FloatRegister dest) {
+  Umull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
+}
+
+void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
+                                               FloatRegister rhs,
+                                               FloatRegister dest) {
+  Umull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::q15MulrInt16x8Relaxed(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+// Integer Negate
+
+void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
+  Neg(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::negInt16x8(FloatRegister src, FloatRegister dest) {
+  Neg(Simd8H(dest), Simd8H(src));
+}
+
+void MacroAssembler::negInt32x4(FloatRegister src, FloatRegister dest) {
+  Neg(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::negInt64x2(FloatRegister src, FloatRegister dest) {
+  Neg(Simd2D(dest), Simd2D(src));
+}
+
+// Saturating integer add
+
+void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                   FloatRegister dest) {
+  Sqadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Uqadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                   FloatRegister dest) {
+  Sqadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Uqadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+// Saturating integer subtract
+
+void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                   FloatRegister dest) {
+  Sqsub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Uqsub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                   FloatRegister dest) {
+  Sqsub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  Uqsub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+// Lane-wise integer minimum
+
+void MacroAssembler::minInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Smin(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  Umin(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::minInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Smin(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  Umin(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::minInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Smin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  Umin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+// Lane-wise integer maximum
+
+void MacroAssembler::maxInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Smax(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  Umax(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::maxInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Smax(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  Umax(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::maxInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  Smax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  Umax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+// Lane-wise integer rounding average
+
+void MacroAssembler::unsignedAverageInt8x16(FloatRegister lhs,
+                                            FloatRegister rhs,
+                                            FloatRegister dest) {
+  Urhadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedAverageInt16x8(FloatRegister lhs,
+                                            FloatRegister rhs,
+                                            FloatRegister dest) {
+  Urhadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+// Lane-wise integer absolute value
+
+void MacroAssembler::absInt8x16(FloatRegister src, FloatRegister dest) {
+  Abs(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::absInt16x8(FloatRegister src, FloatRegister dest) {
+  Abs(Simd8H(dest), Simd8H(src));
+}
+
+void MacroAssembler::absInt32x4(FloatRegister src, FloatRegister dest) {
+  Abs(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) {
+  Abs(Simd2D(dest), Simd2D(src));
+}
+
+// Left shift by variable scalar
+
+void MacroAssembler::leftShiftInt8x16(FloatRegister lhs, Register rhs,
+                                      FloatRegister dest) {
+  ScratchSimd128Scope vscratch(*this);
+  Dup(Simd16B(vscratch), ARMRegister(rhs, 32));
+  Sshl(Simd16B(dest), Simd16B(lhs), Simd16B(vscratch));
+}
+
+void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src,
+                                      FloatRegister dest) {
+  Shl(Simd16B(dest), Simd16B(src), count.value);
+}
+
+void MacroAssembler::leftShiftInt16x8(FloatRegister lhs, Register rhs,
+                                      FloatRegister dest) {
+  ScratchSimd128Scope vscratch(*this);
+  Dup(Simd8H(vscratch), ARMRegister(rhs, 32));
+  Sshl(Simd8H(dest), Simd8H(lhs), Simd8H(vscratch));
+}
+
+void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src,
+                                      FloatRegister dest) {
+  Shl(Simd8H(dest), Simd8H(src), count.value);
+}
+
+void MacroAssembler::leftShiftInt32x4(FloatRegister lhs, Register rhs,
+                                      FloatRegister dest) {
+  ScratchSimd128Scope vscratch(*this);
+  Dup(Simd4S(vscratch), ARMRegister(rhs, 32));
+  Sshl(Simd4S(dest), Simd4S(lhs), Simd4S(vscratch));
+}
+
+void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src,
+                                      FloatRegister dest) {
+  Shl(Simd4S(dest), Simd4S(src), count.value);
+}
+
+void MacroAssembler::leftShiftInt64x2(FloatRegister lhs, Register rhs,
+                                      FloatRegister dest) {
+  ScratchSimd128Scope vscratch(*this);
+  Dup(Simd2D(vscratch), ARMRegister(rhs, 64));
+  Sshl(Simd2D(dest), Simd2D(lhs), Simd2D(vscratch));
+}
+
+void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src,
+                                      FloatRegister dest) {
+  Shl(Simd2D(dest), Simd2D(src), count.value);
+}
+
+// Right shift by variable scalar
+
+void MacroAssembler::rightShiftInt8x16(FloatRegister lhs, Register rhs,
+                                       FloatRegister dest) {
+  MacroAssemblerCompat::rightShiftInt8x16(lhs, rhs, dest,
+                                          /* isUnsigned */ false);
+}
+
+void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src,
+                                       FloatRegister dest) {
+  Sshr(Simd16B(dest), Simd16B(src), count.value);
+}
+
+void MacroAssembler::unsignedRightShiftInt8x16(FloatRegister lhs, Register rhs,
+                                               FloatRegister dest) {
+  MacroAssemblerCompat::rightShiftInt8x16(lhs, rhs, dest,
+                                          /* isUnsigned */ true);
+}
+
+void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src,
+                                               FloatRegister dest) {
+  Ushr(Simd16B(dest), Simd16B(src), count.value);
+}
+
+void MacroAssembler::rightShiftInt16x8(FloatRegister lhs, Register rhs,
+                                       FloatRegister dest) {
+  MacroAssemblerCompat::rightShiftInt16x8(lhs, rhs, dest,
+                                          /* isUnsigned */ false);
+}
+
+void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src,
+                                       FloatRegister dest) {
+  Sshr(Simd8H(dest), Simd8H(src), count.value);
+}
+
+void MacroAssembler::unsignedRightShiftInt16x8(FloatRegister lhs, Register rhs,
+                                               FloatRegister dest) {
+  MacroAssemblerCompat::rightShiftInt16x8(lhs, rhs, dest,
+                                          /* isUnsigned */ true);
+}
+
+void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src,
+                                               FloatRegister dest) {
+  Ushr(Simd8H(dest), Simd8H(src), count.value);
+}
+
+void MacroAssembler::rightShiftInt32x4(FloatRegister lhs, Register rhs,
+                                       FloatRegister dest) {
+  MacroAssemblerCompat::rightShiftInt32x4(lhs, rhs, dest,
+                                          /* isUnsigned */ false);
+}
+
+void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src,
+                                       FloatRegister dest) {
+  Sshr(Simd4S(dest), Simd4S(src), count.value);
+}
+
+void MacroAssembler::unsignedRightShiftInt32x4(FloatRegister lhs, Register rhs,
+                                               FloatRegister dest) {
+  MacroAssemblerCompat::rightShiftInt32x4(lhs, rhs, dest,
+                                          /* isUnsigned */ true);
+}
+
+void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src,
+                                               FloatRegister dest) {
+  Ushr(Simd4S(dest), Simd4S(src), count.value);
+}
+
+void MacroAssembler::rightShiftInt64x2(FloatRegister lhs, Register rhs,
+                                       FloatRegister dest) {
+  MacroAssemblerCompat::rightShiftInt64x2(lhs, rhs, dest,
+                                          /* isUnsigned */ false);
+}
+
+void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src,
+                                       FloatRegister dest) {
+  Sshr(Simd2D(dest), Simd2D(src), count.value);
+}
+
+void MacroAssembler::unsignedRightShiftInt64x2(FloatRegister lhs, Register rhs,
+                                               FloatRegister dest) {
+  MacroAssemblerCompat::rightShiftInt64x2(lhs, rhs, dest,
+                                          /* isUnsigned */ true);
+}
+
+void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src,
+                                               FloatRegister dest) {
+  Ushr(Simd2D(dest), Simd2D(src), count.value);
+}
+
+// Bitwise and, or, xor, not
+
+void MacroAssembler::bitwiseAndSimd128(FloatRegister rhs,
+                                       FloatRegister lhsDest) {
+  And(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  And(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseOrSimd128(FloatRegister rhs,
+                                      FloatRegister lhsDest) {
+  Orr(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
+                                      FloatRegister dest) {
+  Orr(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseXorSimd128(FloatRegister rhs,
+                                       FloatRegister lhsDest) {
+  Eor(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  Eor(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) {
+  Not(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs,
+                                          FloatRegister dest) {
+  Bic(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+// Bitwise AND with complement: dest = ~lhs & rhs, note this is not what Wasm
+// wants but what the x86 hardware offers.  Hence the name.  Since arm64 has
+// dest = lhs & ~rhs we just swap operands.
+
+void MacroAssembler::bitwiseNotAndSimd128(FloatRegister rhs,
+                                          FloatRegister lhsDest) {
+  Bic(Simd16B(lhsDest), Simd16B(rhs), Simd16B(lhsDest));
+}
+
+// Bitwise select
+
+void MacroAssembler::bitwiseSelectSimd128(FloatRegister onTrue,
+                                          FloatRegister onFalse,
+                                          FloatRegister maskDest) {
+  Bsl(Simd16B(maskDest), Simd16B(onTrue), Simd16B(onFalse));
+}
+
+// Population count
+
+void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest) {
+  Cnt(Simd16B(dest), Simd16B(src));
+}
+
+// Any lane true, ie, any bit set
+
+void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest_) {
+  ScratchSimd128Scope scratch_(*this);
+  ARMFPRegister scratch(Simd1D(scratch_));
+  ARMRegister dest(dest_, 64);
+  Addp(scratch, Simd2D(src));
+  Umov(dest, scratch, 0);
+  Cmp(dest, Operand(0));
+  Cset(dest, Assembler::NonZero);
+}
+
+// All lanes true
+
+void MacroAssembler::allTrueInt8x16(FloatRegister src, Register dest_) {
+  ScratchSimd128Scope scratch(*this);
+  ARMRegister dest(dest_, 64);
+  Cmeq(Simd16B(scratch), Simd16B(src), 0);
+  Addp(Simd1D(scratch), Simd2D(scratch));
+  Umov(dest, Simd1D(scratch), 0);
+  Cmp(dest, Operand(0));
+  Cset(dest, Assembler::Zero);
+}
+
+void MacroAssembler::allTrueInt16x8(FloatRegister src, Register dest_) {
+  ScratchSimd128Scope scratch(*this);
+  ARMRegister dest(dest_, 64);
+  Cmeq(Simd8H(scratch), Simd8H(src), 0);
+  Addp(Simd1D(scratch), Simd2D(scratch));
+  Umov(dest, Simd1D(scratch), 0);
+  Cmp(dest, Operand(0));
+  Cset(dest, Assembler::Zero);
+}
+
+void MacroAssembler::allTrueInt32x4(FloatRegister src, Register dest_) {
+  ScratchSimd128Scope scratch(*this);
+  ARMRegister dest(dest_, 64);
+  Cmeq(Simd4S(scratch), Simd4S(src), 0);
+  Addp(Simd1D(scratch), Simd2D(scratch));
+  Umov(dest, Simd1D(scratch), 0);
+  Cmp(dest, Operand(0));
+  Cset(dest, Assembler::Zero);
+}
+
+void MacroAssembler::allTrueInt64x2(FloatRegister src, Register dest_) {
+  ScratchSimd128Scope scratch(*this);
+  ARMRegister dest(dest_, 64);
+  Cmeq(Simd2D(scratch), Simd2D(src), 0);
+  Addp(Simd1D(scratch), Simd2D(scratch));
+  Umov(dest, Simd1D(scratch), 0);
+  Cmp(dest, Operand(0));
+  Cset(dest, Assembler::Zero);
+}
+
+// Bitmask, ie extract and compress high bits of all lanes
+//
+// There's no direct support for this on the chip.  These implementations come
+// from the writeup that added the instruction to the SIMD instruction set.
+// Generally, shifting and masking is used to isolate the sign bit of each
+// element in the right position, then a horizontal add creates the result.  For
+// 8-bit elements an intermediate step is needed to assemble the bits of the
+// upper and lower 8 bytes into 8 halfwords.
+
+void MacroAssembler::bitmaskInt8x16(FloatRegister src, Register dest,
+                                    FloatRegister temp) {
+  ScratchSimd128Scope scratch(*this);
+  int8_t values[] = {1, 2, 4, 8, 16, 32, 64, -128,
+                     1, 2, 4, 8, 16, 32, 64, -128};
+  loadConstantSimd128(SimdConstant::CreateX16(values), temp);
+  Sshr(Simd16B(scratch), Simd16B(src), 7);
+  And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp));
+  Ext(Simd16B(temp), Simd16B(scratch), Simd16B(scratch), 8);
+  Zip1(Simd16B(temp), Simd16B(scratch), Simd16B(temp));
+  Addv(ARMFPRegister(temp, 16), Simd8H(temp));
+  Mov(ARMRegister(dest, 32), Simd8H(temp), 0);
+}
+
+void MacroAssembler::bitmaskInt16x8(FloatRegister src, Register dest,
+                                    FloatRegister temp) {
+  ScratchSimd128Scope scratch(*this);
+  int16_t values[] = {1, 2, 4, 8, 16, 32, 64, 128};
+  loadConstantSimd128(SimdConstant::CreateX8(values), temp);
+  Sshr(Simd8H(scratch), Simd8H(src), 15);
+  And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp));
+  Addv(ARMFPRegister(scratch, 16), Simd8H(scratch));
+  Mov(ARMRegister(dest, 32), Simd8H(scratch), 0);
+}
+
+void MacroAssembler::bitmaskInt32x4(FloatRegister src, Register dest,
+                                    FloatRegister temp) {
+  ScratchSimd128Scope scratch(*this);
+  int32_t values[] = {1, 2, 4, 8};
+  loadConstantSimd128(SimdConstant::CreateX4(values), temp);
+  Sshr(Simd4S(scratch), Simd4S(src), 31);
+  And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp));
+  Addv(ARMFPRegister(scratch, 32), Simd4S(scratch));
+  Mov(ARMRegister(dest, 32), Simd4S(scratch), 0);
+}
+
+void MacroAssembler::bitmaskInt64x2(FloatRegister src, Register dest,
+                                    FloatRegister temp) {
+  Sqxtn(Simd2S(temp), Simd2D(src));
+  Ushr(Simd2S(temp), Simd2S(temp), 31);
+  Usra(ARMFPRegister(temp, 64), ARMFPRegister(temp, 64), 31);
+  Fmov(ARMRegister(dest, 32), ARMFPRegister(temp, 32));
+}
+
+// Comparisons (integer and floating-point)
+
+void MacroAssembler::compareInt8x16(Assembler::Condition cond,
+                                    FloatRegister rhs, FloatRegister lhsDest) {
+  compareSimd128Int(cond, Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
+}
+
+void MacroAssembler::compareInt8x16(Assembler::Condition cond,
+                                    FloatRegister lhs, FloatRegister rhs,
+                                    FloatRegister dest) {
+  compareSimd128Int(cond, Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::compareInt16x8(Assembler::Condition cond,
+                                    FloatRegister rhs, FloatRegister lhsDest) {
+  compareSimd128Int(cond, Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
+}
+
+void MacroAssembler::compareInt16x8(Assembler::Condition cond,
+                                    FloatRegister lhs, FloatRegister rhs,
+                                    FloatRegister dest) {
+  compareSimd128Int(cond, Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::compareInt32x4(Assembler::Condition cond,
+                                    FloatRegister rhs, FloatRegister lhsDest) {
+  compareSimd128Int(cond, Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
+}
+
+void MacroAssembler::compareInt32x4(Assembler::Condition cond,
+                                    FloatRegister lhs, FloatRegister rhs,
+                                    FloatRegister dest) {
+  compareSimd128Int(cond, Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::compareInt64x2(Assembler::Condition cond,
+                                    FloatRegister rhs, FloatRegister lhsDest) {
+  compareSimd128Int(cond, Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs));
+}
+
+void MacroAssembler::compareInt64x2(Assembler::Condition cond,
+                                    FloatRegister lhs, FloatRegister rhs,
+                                    FloatRegister dest) {
+  compareSimd128Int(cond, Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
+                                      FloatRegister rhs,
+                                      FloatRegister lhsDest) {
+  compareSimd128Float(cond, Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
+}
+
+void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
+                                      FloatRegister lhs, FloatRegister rhs,
+                                      FloatRegister dest) {
+  compareSimd128Float(cond, Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
+                                      FloatRegister rhs,
+                                      FloatRegister lhsDest) {
+  compareSimd128Float(cond, Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs));
+}
+
+void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
+                                      FloatRegister lhs, FloatRegister rhs,
+                                      FloatRegister dest) {
+  compareSimd128Float(cond, Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Load
+
+void MacroAssembler::loadUnalignedSimd128(const Address& src,
+                                          FloatRegister dest) {
+  Ldr(ARMFPRegister(dest, 128), toMemOperand(src));
+}
+
+void MacroAssembler::loadUnalignedSimd128(const BaseIndex& address,
+                                          FloatRegister dest) {
+  doBaseIndex(ARMFPRegister(dest, 128), address, vixl::LDR_q);
+}
+
+// Store
+
+void MacroAssembler::storeUnalignedSimd128(FloatRegister src,
+                                           const Address& dest) {
+  Str(ARMFPRegister(src, 128), toMemOperand(dest));
+}
+
+void MacroAssembler::storeUnalignedSimd128(FloatRegister src,
+                                           const BaseIndex& dest) {
+  doBaseIndex(ARMFPRegister(src, 128), dest, vixl::STR_q);
+}
+
+// Floating point negation
+
+void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) {
+  Fneg(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::negFloat64x2(FloatRegister src, FloatRegister dest) {
+  Fneg(Simd2D(dest), Simd2D(src));
+}
+
+// Floating point absolute value
+
+void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) {
+  Fabs(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::absFloat64x2(FloatRegister src, FloatRegister dest) {
+  Fabs(Simd2D(dest), Simd2D(src));
+}
+
+// NaN-propagating minimum
+
+void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fmin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::minFloat32x4(FloatRegister rhs, FloatRegister lhsDest) {
+  Fmin(Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
+}
+
+void MacroAssembler::minFloat64x2(FloatRegister rhs, FloatRegister lhsDest) {
+  Fmin(Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs));
+}
+
+void MacroAssembler::minFloat64x2(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fmin(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// NaN-propagating maximum
+
+void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fmax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::maxFloat32x4(FloatRegister rhs, FloatRegister lhsDest) {
+  Fmax(Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
+}
+
+void MacroAssembler::maxFloat64x2(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fmax(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+void MacroAssembler::maxFloat64x2(FloatRegister rhs, FloatRegister lhsDest) {
+  Fmax(Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs));
+}
+
+// Floating add
+
+void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fadd(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::addFloat64x2(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fadd(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Floating subtract
+
+void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fsub(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::subFloat64x2(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fsub(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Floating division
+
+void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fdiv(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::divFloat64x2(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fdiv(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Floating Multiply
+
+void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fmul(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::mulFloat64x2(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  Fmul(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Pairwise add
+
+void MacroAssembler::extAddPairwiseInt8x16(FloatRegister src,
+                                           FloatRegister dest) {
+  Saddlp(Simd8H(dest), Simd16B(src));
+}
+
+void MacroAssembler::unsignedExtAddPairwiseInt8x16(FloatRegister src,
+                                                   FloatRegister dest) {
+  Uaddlp(Simd8H(dest), Simd16B(src));
+}
+
+void MacroAssembler::extAddPairwiseInt16x8(FloatRegister src,
+                                           FloatRegister dest) {
+  Saddlp(Simd4S(dest), Simd8H(src));
+}
+
+void MacroAssembler::unsignedExtAddPairwiseInt16x8(FloatRegister src,
+                                                   FloatRegister dest) {
+  Uaddlp(Simd4S(dest), Simd8H(src));
+}
+
+// Floating square root
+
+void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) {
+  Fsqrt(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::sqrtFloat64x2(FloatRegister src, FloatRegister dest) {
+  Fsqrt(Simd2D(dest), Simd2D(src));
+}
+
+// Integer to floating point with rounding
+
+void MacroAssembler::convertInt32x4ToFloat32x4(FloatRegister src,
+                                               FloatRegister dest) {
+  Scvtf(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::unsignedConvertInt32x4ToFloat32x4(FloatRegister src,
+                                                       FloatRegister dest) {
+  Ucvtf(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::convertInt32x4ToFloat64x2(FloatRegister src,
+                                               FloatRegister dest) {
+  Sshll(Simd2D(dest), Simd2S(src), 0);
+  Scvtf(Simd2D(dest), Simd2D(dest));
+}
+
+void MacroAssembler::unsignedConvertInt32x4ToFloat64x2(FloatRegister src,
+                                                       FloatRegister dest) {
+  Ushll(Simd2D(dest), Simd2S(src), 0);
+  Ucvtf(Simd2D(dest), Simd2D(dest));
+}
+
+// Floating point to integer with saturation
+
+void MacroAssembler::truncSatFloat32x4ToInt32x4(FloatRegister src,
+                                                FloatRegister dest) {
+  Fcvtzs(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src,
+                                                        FloatRegister dest) {
+  Fcvtzu(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::truncSatFloat64x2ToInt32x4(FloatRegister src,
+                                                FloatRegister dest,
+                                                FloatRegister temp) {
+  Fcvtzs(Simd2D(dest), Simd2D(src));
+  Sqxtn(Simd2S(dest), Simd2D(dest));
+}
+
+void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src,
+                                                        FloatRegister dest,
+                                                        FloatRegister temp) {
+  Fcvtzu(Simd2D(dest), Simd2D(src));
+  Uqxtn(Simd2S(dest), Simd2D(dest));
+}
+
+void MacroAssembler::truncFloat32x4ToInt32x4Relaxed(FloatRegister src,
+                                                    FloatRegister dest) {
+  Fcvtzs(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::unsignedTruncFloat32x4ToInt32x4Relaxed(
+    FloatRegister src, FloatRegister dest) {
+  Fcvtzu(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::truncFloat64x2ToInt32x4Relaxed(FloatRegister src,
+                                                    FloatRegister dest) {
+  Fcvtzs(Simd2D(dest), Simd2D(src));
+  Sqxtn(Simd2S(dest), Simd2D(dest));
+}
+
+void MacroAssembler::unsignedTruncFloat64x2ToInt32x4Relaxed(
+    FloatRegister src, FloatRegister dest) {
+  Fcvtzu(Simd2D(dest), Simd2D(src));
+  Uqxtn(Simd2S(dest), Simd2D(dest));
+}
+
+// Floating point narrowing
+
+void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src,
+                                                 FloatRegister dest) {
+  Fcvtn(Simd2S(dest), Simd2D(src));
+}
+
+// Floating point widening
+
+void MacroAssembler::convertFloat32x4ToFloat64x2(FloatRegister src,
+                                                 FloatRegister dest) {
+  Fcvtl(Simd2D(dest), Simd2S(src));
+}
+
+// Integer to integer narrowing
+
+void MacroAssembler::narrowInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                   FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  if (rhs == dest) {
+    Mov(scratch, SimdReg(rhs));
+    rhs = scratch;
+  }
+  Sqxtn(Simd8B(dest), Simd8H(lhs));
+  Sqxtn2(Simd16B(dest), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  if (rhs == dest) {
+    Mov(scratch, SimdReg(rhs));
+    rhs = scratch;
+  }
+  Sqxtun(Simd8B(dest), Simd8H(lhs));
+  Sqxtun2(Simd16B(dest), Simd8H(rhs));
+}
+
+void MacroAssembler::narrowInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                   FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  if (rhs == dest) {
+    Mov(scratch, SimdReg(rhs));
+    rhs = scratch;
+  }
+  Sqxtn(Simd4H(dest), Simd4S(lhs));
+  Sqxtn2(Simd8H(dest), Simd4S(rhs));
+}
+
+void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                           FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  if (rhs == dest) {
+    Mov(scratch, SimdReg(rhs));
+    rhs = scratch;
+  }
+  Sqxtun(Simd4H(dest), Simd4S(lhs));
+  Sqxtun2(Simd8H(dest), Simd4S(rhs));
+}
+
+// Integer to integer widening
+
+void MacroAssembler::widenLowInt8x16(FloatRegister src, FloatRegister dest) {
+  Sshll(Simd8H(dest), Simd8B(src), 0);
+}
+
+void MacroAssembler::widenHighInt8x16(FloatRegister src, FloatRegister dest) {
+  Sshll2(Simd8H(dest), Simd16B(src), 0);
+}
+
+void MacroAssembler::unsignedWidenLowInt8x16(FloatRegister src,
+                                             FloatRegister dest) {
+  Ushll(Simd8H(dest), Simd8B(src), 0);
+}
+
+void MacroAssembler::unsignedWidenHighInt8x16(FloatRegister src,
+                                              FloatRegister dest) {
+  Ushll2(Simd8H(dest), Simd16B(src), 0);
+}
+
+void MacroAssembler::widenLowInt16x8(FloatRegister src, FloatRegister dest) {
+  Sshll(Simd4S(dest), Simd4H(src), 0);
+}
+
+void MacroAssembler::widenHighInt16x8(FloatRegister src, FloatRegister dest) {
+  Sshll2(Simd4S(dest), Simd8H(src), 0);
+}
+
+void MacroAssembler::unsignedWidenLowInt16x8(FloatRegister src,
+                                             FloatRegister dest) {
+  Ushll(Simd4S(dest), Simd4H(src), 0);
+}
+
+void MacroAssembler::unsignedWidenHighInt16x8(FloatRegister src,
+                                              FloatRegister dest) {
+  Ushll2(Simd4S(dest), Simd8H(src), 0);
+}
+
+void MacroAssembler::widenLowInt32x4(FloatRegister src, FloatRegister dest) {
+  Sshll(Simd2D(dest), Simd2S(src), 0);
+}
+
+void MacroAssembler::unsignedWidenLowInt32x4(FloatRegister src,
+                                             FloatRegister dest) {
+  Ushll(Simd2D(dest), Simd2S(src), 0);
+}
+
+void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) {
+  Sshll2(Simd2D(dest), Simd4S(src), 0);
+}
+
+void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src,
+                                              FloatRegister dest) {
+  Ushll2(Simd2D(dest), Simd4S(src), 0);
+}
+
+// Compare-based minimum/maximum (experimental as of August, 2020)
+// https://github.com/WebAssembly/simd/pull/122
+
+void MacroAssembler::pseudoMinFloat32x4(FloatRegister rhsOrRhsDest,
+                                        FloatRegister lhsOrLhsDest) {
+  // Shut up the linter by using the same names as in the declaration, then
+  // aliasing here.
+  FloatRegister rhs = rhsOrRhsDest;
+  FloatRegister lhsDest = lhsOrLhsDest;
+  ScratchSimd128Scope scratch(*this);
+  Fcmgt(Simd4S(scratch), Simd4S(lhsDest), Simd4S(rhs));
+  Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest));
+  Mov(SimdReg(lhsDest), scratch);
+}
+
+void MacroAssembler::pseudoMinFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  Fcmgt(Simd4S(scratch), Simd4S(lhs), Simd4S(rhs));
+  Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs));
+  Mov(SimdReg(dest), scratch);
+}
+
+void MacroAssembler::pseudoMinFloat64x2(FloatRegister rhsOrRhsDest,
+                                        FloatRegister lhsOrLhsDest) {
+  FloatRegister rhs = rhsOrRhsDest;
+  FloatRegister lhsDest = lhsOrLhsDest;
+  ScratchSimd128Scope scratch(*this);
+  Fcmgt(Simd2D(scratch), Simd2D(lhsDest), Simd2D(rhs));
+  Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest));
+  Mov(SimdReg(lhsDest), scratch);
+}
+
+void MacroAssembler::pseudoMinFloat64x2(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  Fcmgt(Simd2D(scratch), Simd2D(lhs), Simd2D(rhs));
+  Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs));
+  Mov(SimdReg(dest), scratch);
+}
+
+void MacroAssembler::pseudoMaxFloat32x4(FloatRegister rhsOrRhsDest,
+                                        FloatRegister lhsOrLhsDest) {
+  FloatRegister rhs = rhsOrRhsDest;
+  FloatRegister lhsDest = lhsOrLhsDest;
+  ScratchSimd128Scope scratch(*this);
+  Fcmgt(Simd4S(scratch), Simd4S(rhs), Simd4S(lhsDest));
+  Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest));
+  Mov(SimdReg(lhsDest), scratch);
+}
+
+void MacroAssembler::pseudoMaxFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  Fcmgt(Simd4S(scratch), Simd4S(rhs), Simd4S(lhs));
+  Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs));
+  Mov(SimdReg(dest), scratch);
+}
+
+void MacroAssembler::pseudoMaxFloat64x2(FloatRegister rhsOrRhsDest,
+                                        FloatRegister lhsOrLhsDest) {
+  FloatRegister rhs = rhsOrRhsDest;
+  FloatRegister lhsDest = lhsOrLhsDest;
+  ScratchSimd128Scope scratch(*this);
+  Fcmgt(Simd2D(scratch), Simd2D(rhs), Simd2D(lhsDest));
+  Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest));
+  Mov(SimdReg(lhsDest), scratch);
+}
+
+void MacroAssembler::pseudoMaxFloat64x2(FloatRegister lhs, FloatRegister rhs,
+                                        FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  Fcmgt(Simd2D(scratch), Simd2D(rhs), Simd2D(lhs));
+  Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs));
+  Mov(SimdReg(dest), scratch);
+}
+
+// Widening/pairwise integer dot product (experimental as of August, 2020)
+// https://github.com/WebAssembly/simd/pull/127
+
+void MacroAssembler::widenDotInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                     FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  Smull(Simd4S(scratch), Simd4H(lhs), Simd4H(rhs));
+  Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
+  Addp(Simd4S(dest), Simd4S(scratch), Simd4S(dest));
+}
+
+void MacroAssembler::dotInt8x16Int7x16(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  ScratchSimd128Scope scratch(*this);
+  Smull(Simd8H(scratch), Simd8B(lhs), Simd8B(rhs));
+  Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
+  Addp(Simd8H(dest), Simd8H(scratch), Simd8H(dest));
+}
+
+void MacroAssembler::dotInt8x16Int7x16ThenAdd(FloatRegister lhs,
+                                              FloatRegister rhs,
+                                              FloatRegister dest,
+                                              FloatRegister temp) {
+  ScratchSimd128Scope scratch(*this);
+  Smull(Simd8H(scratch), Simd8B(lhs), Simd8B(rhs));
+  Smull2(Simd8H(temp), Simd16B(lhs), Simd16B(rhs));
+  Addp(Simd8H(temp), Simd8H(scratch), Simd8H(temp));
+  Sadalp(Simd4S(dest), Simd8H(temp));
+}
+
+// Floating point rounding (experimental as of August, 2020)
+// https://github.com/WebAssembly/simd/pull/232
+
+void MacroAssembler::ceilFloat32x4(FloatRegister src, FloatRegister dest) {
+  Frintp(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::ceilFloat64x2(FloatRegister src, FloatRegister dest) {
+  Frintp(Simd2D(dest), Simd2D(src));
+}
+
+void MacroAssembler::floorFloat32x4(FloatRegister src, FloatRegister dest) {
+  Frintm(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::floorFloat64x2(FloatRegister src, FloatRegister dest) {
+  Frintm(Simd2D(dest), Simd2D(src));
+}
+
+void MacroAssembler::truncFloat32x4(FloatRegister src, FloatRegister dest) {
+  Frintz(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::truncFloat64x2(FloatRegister src, FloatRegister dest) {
+  Frintz(Simd2D(dest), Simd2D(src));
+}
+
+void MacroAssembler::nearestFloat32x4(FloatRegister src, FloatRegister dest) {
+  Frintn(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::nearestFloat64x2(FloatRegister src, FloatRegister dest) {
+  Frintn(Simd2D(dest), Simd2D(src));
+}
+
+// Floating multiply-accumulate: srcDest [+-]= src1 * src2
+
+void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2,
+                                  FloatRegister srcDest) {
+  Fmla(Simd4S(srcDest), Simd4S(src1), Simd4S(src2));
+}
+
+void MacroAssembler::fnmaFloat32x4(FloatRegister src1, FloatRegister src2,
+                                   FloatRegister srcDest) {
+  Fmls(Simd4S(srcDest), Simd4S(src1), Simd4S(src2));
+}
+
+void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2,
+                                  FloatRegister srcDest) {
+  Fmla(Simd2D(srcDest), Simd2D(src1), Simd2D(src2));
+}
+
+void MacroAssembler::fnmaFloat64x2(FloatRegister src1, FloatRegister src2,
+                                   FloatRegister srcDest) {
+  Fmls(Simd2D(srcDest), Simd2D(src1), Simd2D(src2));
+}
+
+void MacroAssembler::minFloat32x4Relaxed(FloatRegister src,
+                                         FloatRegister srcDest) {
+  Fmin(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest));
+}
+
+void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
+                                         FloatRegister dest) {
+  Fmin(Simd4S(dest), Simd4S(rhs), Simd4S(lhs));
+}
+
+void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src,
+                                         FloatRegister srcDest) {
+  Fmax(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest));
+}
+
+void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
+                                         FloatRegister dest) {
+  Fmax(Simd4S(dest), Simd4S(rhs), Simd4S(lhs));
+}
+
+void MacroAssembler::minFloat64x2Relaxed(FloatRegister src,
+                                         FloatRegister srcDest) {
+  Fmin(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest));
+}
+
+void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
+                                         FloatRegister dest) {
+  Fmin(Simd2D(dest), Simd2D(rhs), Simd2D(lhs));
+}
+
+void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src,
+                                         FloatRegister srcDest) {
+  Fmax(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest));
+}
+
+void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
+                                         FloatRegister dest) {
+  Fmax(Simd2D(dest), Simd2D(rhs), Simd2D(lhs));
+}
+
+//}}} check_macroassembler_style
+// ===============================================================
+
+void MacroAssemblerCompat::addToStackPtr(Register src) {
+  Add(GetStackPointer64(), GetStackPointer64(), ARMRegister(src, 64));
+  // Given that required invariant SP <= PSP, this is probably pointless,
+  // since it gives PSP a larger value.
+  syncStackPtr();
+}
+
+void MacroAssemblerCompat::addToStackPtr(Imm32 imm) {
+  Add(GetStackPointer64(), GetStackPointer64(), Operand(imm.value));
+  // As above, probably pointless.
+  syncStackPtr();
+}
+
+void MacroAssemblerCompat::addToStackPtr(const Address& src) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch = temps.AcquireX();
+  Ldr(scratch, toMemOperand(src));
+  Add(GetStackPointer64(), GetStackPointer64(), scratch);
+  // As above, probably pointless.
+  syncStackPtr();
+}
+
+void MacroAssemblerCompat::addStackPtrTo(Register dest) {
+  Add(ARMRegister(dest, 64), ARMRegister(dest, 64), GetStackPointer64());
+}
+
+void MacroAssemblerCompat::subFromStackPtr(Register src) {
+  Sub(GetStackPointer64(), GetStackPointer64(), ARMRegister(src, 64));
+  syncStackPtr();
+}
+
+void MacroAssemblerCompat::subFromStackPtr(Imm32 imm) {
+  Sub(GetStackPointer64(), GetStackPointer64(), Operand(imm.value));
+  syncStackPtr();
+}
+
+void MacroAssemblerCompat::subStackPtrFrom(Register dest) {
+  Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), GetStackPointer64());
+}
+
+void MacroAssemblerCompat::andToStackPtr(Imm32 imm) {
+  if (sp.Is(GetStackPointer64())) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch = temps.AcquireX();
+    Mov(scratch, sp);
+    And(sp, scratch, Operand(imm.value));
+    // syncStackPtr() not needed since our SP is the real SP.
+  } else {
+    And(GetStackPointer64(), GetStackPointer64(), Operand(imm.value));
+    syncStackPtr();
+  }
+}
+
+void MacroAssemblerCompat::moveToStackPtr(Register src) {
+  Mov(GetStackPointer64(), ARMRegister(src, 64));
+  syncStackPtr();
+}
+
+void MacroAssemblerCompat::moveStackPtrTo(Register dest) {
+  Mov(ARMRegister(dest, 64), GetStackPointer64());
+}
+
+void MacroAssemblerCompat::loadStackPtr(const Address& src) {
+  if (sp.Is(GetStackPointer64())) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch = temps.AcquireX();
+    Ldr(scratch, toMemOperand(src));
+    Mov(sp, scratch);
+    // syncStackPtr() not needed since our SP is the real SP.
+  } else {
+    Ldr(GetStackPointer64(), toMemOperand(src));
+    syncStackPtr();
+  }
+}
+
+void MacroAssemblerCompat::storeStackPtr(const Address& dest) {
+  if (sp.Is(GetStackPointer64())) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch = temps.AcquireX();
+    Mov(scratch, sp);
+    Str(scratch, toMemOperand(dest));
+  } else {
+    Str(GetStackPointer64(), toMemOperand(dest));
+  }
+}
+
+void MacroAssemblerCompat::branchTestStackPtr(Condition cond, Imm32 rhs,
+                                              Label* label) {
+  if (sp.Is(GetStackPointer64())) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch = temps.AcquireX();
+    Mov(scratch, sp);
+    Tst(scratch, Operand(rhs.value));
+  } else {
+    Tst(GetStackPointer64(), Operand(rhs.value));
+  }
+  B(label, cond);
+}
+
+void MacroAssemblerCompat::branchStackPtr(Condition cond, Register rhs_,
+                                          Label* label) {
+  ARMRegister rhs(rhs_, 64);
+  if (sp.Is(GetStackPointer64())) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch = temps.AcquireX();
+    Mov(scratch, sp);
+    Cmp(scratch, rhs);
+  } else {
+    Cmp(GetStackPointer64(), rhs);
+  }
+  B(label, cond);
+}
+
+void MacroAssemblerCompat::branchStackPtrRhs(Condition cond, Address lhs,
+                                             Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch = temps.AcquireX();
+  Ldr(scratch, toMemOperand(lhs));
+  // Cmp disallows SP as the rhs, so flip the operands and invert the
+  // condition.
+  Cmp(GetStackPointer64(), scratch);
+  B(label, Assembler::InvertCondition(cond));
+}
+
+void MacroAssemblerCompat::branchStackPtrRhs(Condition cond,
+                                             AbsoluteAddress lhs,
+                                             Label* label) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch = temps.AcquireX();
+  loadPtr(lhs, scratch.asUnsized());
+  // Cmp disallows SP as the rhs, so flip the operands and invert the
+  // condition.
+  Cmp(GetStackPointer64(), scratch);
+  B(label, Assembler::InvertCondition(cond));
+}
+
+// If source is a double, load into dest.
+// If source is int32, convert to double and store in dest.
+// Else, branch to failure.
+void MacroAssemblerCompat::ensureDouble(const ValueOperand& source,
+                                        FloatRegister dest, Label* failure) {
+  Label isDouble, done;
+
+  {
+    ScratchTagScope tag(asMasm(), source);
+    splitTagForTest(source, tag);
+    asMasm().branchTestDouble(Assembler::Equal, tag, &isDouble);
+    asMasm().branchTestInt32(Assembler::NotEqual, tag, failure);
+  }
+
+  convertInt32ToDouble(source.valueReg(), dest);
+  jump(&done);
+
+  bind(&isDouble);
+  unboxDouble(source, dest);
+
+  bind(&done);
+}
+
+void MacroAssemblerCompat::unboxValue(const ValueOperand& src, AnyRegister dest,
+                                      JSValueType type) {
+  if (dest.isFloat()) {
+    Label notInt32, end;
+    asMasm().branchTestInt32(Assembler::NotEqual, src, &notInt32);
+    convertInt32ToDouble(src.valueReg(), dest.fpu());
+    jump(&end);
+    bind(&notInt32);
+    unboxDouble(src, dest.fpu());
+    bind(&end);
+  } else {
+    unboxNonDouble(src, dest.gpr(), type);
+  }
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm64_MacroAssembler_arm64_inl_h */
diff --git a/js/src/jit/arm64/MacroAssembler-arm64.cpp b/js/src/jit/arm64/MacroAssembler-arm64.cpp
new file mode 100644
index 0000000000..a4aff730e6
--- /dev/null
+++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp
@@ -0,0 +1,3416 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/MacroAssembler-arm64.h"
+
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include "jsmath.h"
+
+#include "jit/arm64/MoveEmitter-arm64.h"
+#include "jit/arm64/SharedICRegisters-arm64.h"
+#include "jit/Bailouts.h"
+#include "jit/BaselineFrame.h"
+#include "jit/JitRuntime.h"
+#include "jit/MacroAssembler.h"
+#include "util/Memory.h"
+#include "vm/BigIntType.h"
+#include "vm/JitActivation.h"  // js::jit::JitActivation
+#include "vm/JSContext.h"
+#include "vm/StringType.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+namespace js {
+namespace jit {
+
+enum class Width { _32 = 32, _64 = 64 };
+
+static inline ARMRegister X(Register r) { return ARMRegister(r, 64); }
+
+static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) {
+  return masm.toARMRegister(r, 64);
+}
+
+static inline ARMRegister W(Register r) { return ARMRegister(r, 32); }
+
+static inline ARMRegister R(Register r, Width w) {
+  return ARMRegister(r, unsigned(w));
+}
+
+void MacroAssemblerCompat::boxValue(JSValueType type, Register src,
+                                    Register dest) {
+#ifdef DEBUG
+  if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+    Label upper32BitsZeroed;
+    movePtr(ImmWord(UINT32_MAX), dest);
+    asMasm().branchPtr(Assembler::BelowOrEqual, src, dest, &upper32BitsZeroed);
+    breakpoint();
+    bind(&upper32BitsZeroed);
+  }
+#endif
+  Orr(ARMRegister(dest, 64), ARMRegister(src, 64),
+      Operand(ImmShiftedTag(type).value));
+}
+
+#ifdef ENABLE_WASM_SIMD
+bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) {
+  switch (op) {
+    case wasm::SimdOp::I8x16Shl:
+    case wasm::SimdOp::I8x16ShrU:
+    case wasm::SimdOp::I8x16ShrS:
+      *mask = 7;
+      break;
+    case wasm::SimdOp::I16x8Shl:
+    case wasm::SimdOp::I16x8ShrU:
+    case wasm::SimdOp::I16x8ShrS:
+      *mask = 15;
+      break;
+    case wasm::SimdOp::I32x4Shl:
+    case wasm::SimdOp::I32x4ShrU:
+    case wasm::SimdOp::I32x4ShrS:
+      *mask = 31;
+      break;
+    case wasm::SimdOp::I64x2Shl:
+    case wasm::SimdOp::I64x2ShrU:
+    case wasm::SimdOp::I64x2ShrS:
+      *mask = 63;
+      break;
+    default:
+      MOZ_CRASH("Unexpected shift operation");
+  }
+  return true;
+}
+#endif
+
+void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
+  ARMRegister dest(output, 32);
+  Fcvtns(dest, ARMFPRegister(input, 64));
+
+  {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+
+    Mov(scratch32, Operand(0xff));
+    Cmp(dest, scratch32);
+    Csel(dest, dest, scratch32, LessThan);
+  }
+
+  Cmp(dest, Operand(0));
+  Csel(dest, dest, wzr, GreaterThan);
+}
+
+js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() {
+  return *static_cast<js::jit::MacroAssembler*>(this);
+}
+
+const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const {
+  return *static_cast<const js::jit::MacroAssembler*>(this);
+}
+
+vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() {
+  return *static_cast<vixl::MacroAssembler*>(this);
+}
+
+const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const {
+  return *static_cast<const vixl::MacroAssembler*>(this);
+}
+
+void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) {
+  BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest);
+  label->patchAt()->bind(bo.getOffset());
+  label->setLinkMode(CodeLabel::MoveImmediate);
+}
+
+BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) {
+  const size_t numInst = 1;           // Inserting one load instruction.
+  const unsigned numPoolEntries = 2;  // Every pool entry is 4 bytes.
+  uint8_t* literalAddr = (uint8_t*)(&ptr.value);  // TODO: Should be const.
+
+  // Scratch space for generating the load instruction.
+  //
+  // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
+  // index to the corresponding PoolEntry in the instruction itself.
+  //
+  // That index will be fixed up later when finishPool()
+  // walks over all marked loads and calls PatchConstantPoolLoad().
+  uint32_t instructionScratch = 0;
+
+  // Emit the instruction mask in the scratch space.
+  // The offset doesn't matter: it will be fixed up later.
+  vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
+                       0);
+
+  // Add the entry to the pool, fix up the LDR imm19 offset,
+  // and add the completed instruction to the buffer.
+  return allocLiteralLoadEntry(numInst, numPoolEntries,
+                               (uint8_t*)&instructionScratch, literalAddr);
+}
+
+BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr,
+                                                    Register dest) {
+  const size_t numInst = 1;           // Inserting one load instruction.
+  const unsigned numPoolEntries = 2;  // Every pool entry is 4 bytes.
+  uint8_t* literalAddr = (uint8_t*)(&ptr.value);
+
+  // Scratch space for generating the load instruction.
+  //
+  // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
+  // index to the corresponding PoolEntry in the instruction itself.
+  //
+  // That index will be fixed up later when finishPool()
+  // walks over all marked loads and calls PatchConstantPoolLoad().
+  uint32_t instructionScratch = 0;
+
+  // Emit the instruction mask in the scratch space.
+  // The offset doesn't matter: it will be fixed up later.
+  vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
+                       0);
+
+  // Add the entry to the pool, fix up the LDR imm19 offset,
+  // and add the completed instruction to the buffer.
+  return allocLiteralLoadEntry(numInst, numPoolEntries,
+                               (uint8_t*)&instructionScratch, literalAddr);
+}
+
+void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) {
+  loadPtr(src, dest);
+}
+
+void MacroAssemblerCompat::handleFailureWithHandlerTail(Label* profilerExitTail,
+                                                        Label* bailoutTail) {
+  // Fail rather than silently create wrong code.
+  MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
+
+  // Reserve space for exception information.
+  int64_t size = (sizeof(ResumeFromException) + 7) & ~7;
+  Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size));
+  syncStackPtr();
+
+  MOZ_ASSERT(!x0.Is(PseudoStackPointer64));
+  Mov(x0, PseudoStackPointer64);
+
+  // Call the handler.
+  using Fn = void (*)(ResumeFromException* rfe);
+  asMasm().setupUnalignedABICall(r1);
+  asMasm().passABIArg(r0);
+  asMasm().callWithABI<Fn, HandleException>(
+      MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+  Label entryFrame;
+  Label catch_;
+  Label finally;
+  Label returnBaseline;
+  Label returnIon;
+  Label bailout;
+  Label wasm;
+  Label wasmCatch;
+
+  // Check the `asMasm` calls above didn't mess with the StackPointer identity.
+  MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
+
+  loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfKind()), r0);
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::EntryFrame), &entryFrame);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch),
+                    &catch_);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally),
+                    &finally);
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::ForcedReturnBaseline),
+                    &returnBaseline);
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout),
+                    &bailout);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Wasm),
+                    &wasm);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch),
+                    &wasmCatch);
+
+  breakpoint();  // Invalid kind.
+
+  // No exception handler. Load the error value, restore state and return from
+  // the entry frame.
+  bind(&entryFrame);
+  moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
+  loadPtr(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
+      FramePointer);
+  loadPtr(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
+      PseudoStackPointer);
+
+  // `retn` does indeed sync the stack pointer, but before doing that it reads
+  // from the stack.  Consequently, if we remove this call to syncStackPointer
+  // then we take on the requirement to prove that the immediately preceding
+  // loadPtr produces a value for PSP which maintains the SP <= PSP invariant.
+  // That's a proof burden we don't want to take on.  In general it would be
+  // good to move (at some time in the future, not now) to a world where
+  // *every* assignment to PSP or SP is followed immediately by a copy into
+  // the other register.  That would make all required correctness proofs
+  // trivial in the sense that it requires only local inspection of code
+  // immediately following (dominated by) any such assignment.
+  syncStackPtr();
+  retn(Imm32(1 * sizeof(void*)));  // Pop from stack and return.
+
+  // If we found a catch handler, this must be a baseline frame. Restore state
+  // and jump to the catch block.
+  bind(&catch_);
+  loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()),
+          r0);
+  loadPtr(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
+      FramePointer);
+  loadPtr(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
+      PseudoStackPointer);
+  syncStackPtr();
+  Br(x0);
+
+  // If we found a finally block, this must be a baseline frame. Push two
+  // values expected by the finally block: the exception and BooleanValue(true).
+  bind(&finally);
+  ARMRegister exception = x1;
+  Ldr(exception, MemOperand(PseudoStackPointer64,
+                            ResumeFromException::offsetOfException()));
+  Ldr(x0,
+      MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfTarget()));
+  Ldr(ARMRegister(FramePointer, 64),
+      MemOperand(PseudoStackPointer64,
+                 ResumeFromException::offsetOfFramePointer()));
+  Ldr(PseudoStackPointer64,
+      MemOperand(PseudoStackPointer64,
+                 ResumeFromException::offsetOfStackPointer()));
+  syncStackPtr();
+  push(exception);
+  pushValue(BooleanValue(true));
+  Br(x0);
+
+  // Return BaselineFrame->returnValue() to the caller.
+  // Used in debug mode and for GeneratorReturn.
+  Label profilingInstrumentation;
+  bind(&returnBaseline);
+  loadPtr(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
+      FramePointer);
+  loadPtr(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
+      PseudoStackPointer);
+  // See comment further up beginning "`retn` does indeed sync the stack
+  // pointer".  That comment applies here too.
+  syncStackPtr();
+  loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()),
+            JSReturnOperand);
+  jump(&profilingInstrumentation);
+
+  // Return the given value to the caller.
+  bind(&returnIon);
+  loadValue(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfException()),
+      JSReturnOperand);
+  loadPtr(
+      Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)),
+      FramePointer);
+  loadPtr(
+      Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)),
+      PseudoStackPointer);
+  syncStackPtr();
+
+  // If profiling is enabled, then update the lastProfilingFrame to refer to
+  // caller frame before returning. This code is shared by ForcedReturnIon
+  // and ForcedReturnBaseline.
+  bind(&profilingInstrumentation);
+  {
+    Label skipProfilingInstrumentation;
+    AbsoluteAddress addressOfEnabled(
+        asMasm().runtime()->geckoProfiler().addressOfEnabled());
+    asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
+                      &skipProfilingInstrumentation);
+    jump(profilerExitTail);
+    bind(&skipProfilingInstrumentation);
+  }
+
+  movePtr(FramePointer, PseudoStackPointer);
+  syncStackPtr();
+  vixl::MacroAssembler::Pop(ARMRegister(FramePointer, 64));
+
+  vixl::MacroAssembler::Pop(vixl::lr);
+  syncStackPtr();
+  vixl::MacroAssembler::Ret(vixl::lr);
+
+  // If we are bailing out to baseline to handle an exception, jump to the
+  // bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success.
+  bind(&bailout);
+  Ldr(x2, MemOperand(PseudoStackPointer64,
+                     ResumeFromException::offsetOfBailoutInfo()));
+  Ldr(PseudoStackPointer64,
+      MemOperand(PseudoStackPointer64,
+                 ResumeFromException::offsetOfStackPointer()));
+  syncStackPtr();
+  Mov(x0, 1);
+  jump(bailoutTail);
+
+  // If we are throwing and the innermost frame was a wasm frame, reset SP and
+  // FP; SP is pointing to the unwound return address to the wasm entry, so
+  // we can just ret().
+  bind(&wasm);
+  Ldr(x29, MemOperand(PseudoStackPointer64,
+                      ResumeFromException::offsetOfFramePointer()));
+  Ldr(PseudoStackPointer64,
+      MemOperand(PseudoStackPointer64,
+                 ResumeFromException::offsetOfStackPointer()));
+  syncStackPtr();
+  Mov(x23, int64_t(wasm::FailInstanceReg));
+  ret();
+
+  // Found a wasm catch handler, restore state and jump to it.
+  bind(&wasmCatch);
+  loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()),
+          r0);
+  loadPtr(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
+      r29);
+  loadPtr(
+      Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
+      PseudoStackPointer);
+  syncStackPtr();
+  Br(x0);
+
+  MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
+}
+
+void MacroAssemblerCompat::profilerEnterFrame(Register framePtr,
+                                              Register scratch) {
+  asMasm().loadJSContext(scratch);
+  loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
+  storePtr(framePtr,
+           Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
+  storePtr(ImmPtr(nullptr),
+           Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
+}
+
+void MacroAssemblerCompat::profilerExitFrame() {
+  jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail());
+}
+
+Assembler::Condition MacroAssemblerCompat::testStringTruthy(
+    bool truthy, const ValueOperand& value) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  const ARMRegister scratch32(scratch, 32);
+  const ARMRegister scratch64(scratch, 64);
+
+  MOZ_ASSERT(value.valueReg() != scratch);
+
+  unboxString(value, scratch);
+  Ldr(scratch32, MemOperand(scratch64, JSString::offsetOfLength()));
+  Cmp(scratch32, Operand(0));
+  return truthy ? Condition::NonZero : Condition::Zero;
+}
+
+Assembler::Condition MacroAssemblerCompat::testBigIntTruthy(
+    bool truthy, const ValueOperand& value) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+
+  MOZ_ASSERT(value.valueReg() != scratch);
+
+  unboxBigInt(value, scratch);
+  load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch);
+  cmp32(scratch, Imm32(0));
+  return truthy ? Condition::NonZero : Condition::Zero;
+}
+
+void MacroAssemblerCompat::breakpoint() {
+  // Note, other payloads are possible, but GDB is known to misinterpret them
+  // sometimes and iloop on the breakpoint instead of stopping properly.
+  Brk(0);
+}
+
+// Either `any` is valid or `sixtyfour` is valid.  Return a 32-bit ARMRegister
+// in the first case and an ARMRegister of the desired size in the latter case.
+
+static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour,
+                                      unsigned size = 64) {
+  MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid()));
+
+  if (sixtyfour == Register64::Invalid()) {
+    return ARMRegister(any.gpr(), 32);
+  }
+
+  return ARMRegister(sixtyfour.reg, size);
+}
+
+// Assert that `sixtyfour` is invalid and then return an FP register from `any`
+// of the desired size.
+
+static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour,
+                                        unsigned size) {
+  MOZ_ASSERT(sixtyfour == Register64::Invalid());
+  return ARMFPRegister(any.fpu(), size);
+}
+
+void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
+                                        Register memoryBase_, Register ptr_,
+                                        AnyRegister outany, Register64 out64) {
+  uint32_t offset = access.offset();
+  MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
+
+  ARMRegister memoryBase(memoryBase_, 64);
+  ARMRegister ptr(ptr_, 64);
+  if (offset) {
+    vixl::UseScratchRegisterScope temps(this);
+    ARMRegister scratch = temps.AcquireX();
+    Add(scratch, ptr, Operand(offset));
+    MemOperand srcAddr(memoryBase, scratch);
+    wasmLoadImpl(access, srcAddr, outany, out64);
+  } else {
+    MemOperand srcAddr(memoryBase, ptr);
+    wasmLoadImpl(access, srcAddr, outany, out64);
+  }
+}
+
+void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
+                                        MemOperand srcAddr, AnyRegister outany,
+                                        Register64 out64) {
+  // Reg+Reg and Reg+SmallImm addressing is directly encodable in one Load
+  // instruction, hence we expect exactly one instruction to be emitted in the
+  // window.
+  int32_t instructionsExpected = 1;
+
+  // Splat and widen however require an additional instruction to be emitted
+  // after the load, so allow one more instruction in the window.
+  if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) {
+    MOZ_ASSERT(access.type() == Scalar::Float64);
+    instructionsExpected++;
+  }
+
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
+  asMasm().memoryBarrierBefore(access.sync());
+
+  {
+    // The AutoForbidPoolsAndNops asserts if we emit more than the expected
+    // number of instructions and thus ensures that the access metadata is
+    // emitted at the address of the Load.
+    AutoForbidPoolsAndNops afp(this, instructionsExpected);
+
+    append(access, asMasm().currentOffset());
+    switch (access.type()) {
+      case Scalar::Int8:
+        Ldrsb(SelectGPReg(outany, out64), srcAddr);
+        break;
+      case Scalar::Uint8:
+        Ldrb(SelectGPReg(outany, out64), srcAddr);
+        break;
+      case Scalar::Int16:
+        Ldrsh(SelectGPReg(outany, out64), srcAddr);
+        break;
+      case Scalar::Uint16:
+        Ldrh(SelectGPReg(outany, out64), srcAddr);
+        break;
+      case Scalar::Int32:
+        if (out64 != Register64::Invalid()) {
+          Ldrsw(SelectGPReg(outany, out64), srcAddr);
+        } else {
+          Ldr(SelectGPReg(outany, out64, 32), srcAddr);
+        }
+        break;
+      case Scalar::Uint32:
+        Ldr(SelectGPReg(outany, out64, 32), srcAddr);
+        break;
+      case Scalar::Int64:
+        Ldr(SelectGPReg(outany, out64), srcAddr);
+        break;
+      case Scalar::Float32:
+        // LDR does the right thing also for access.isZeroExtendSimd128Load()
+        Ldr(SelectFPReg(outany, out64, 32), srcAddr);
+        break;
+      case Scalar::Float64:
+        if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) {
+          ScratchSimd128Scope scratch_(asMasm());
+          ARMFPRegister scratch = Simd1D(scratch_);
+          Ldr(scratch, srcAddr);
+          if (access.isSplatSimd128Load()) {
+            Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0);
+          } else {
+            MOZ_ASSERT(access.isWidenSimd128Load());
+            switch (access.widenSimdOp()) {
+              case wasm::SimdOp::V128Load8x8S:
+                Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
+                break;
+              case wasm::SimdOp::V128Load8x8U:
+                Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
+                break;
+              case wasm::SimdOp::V128Load16x4S:
+                Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
+                break;
+              case wasm::SimdOp::V128Load16x4U:
+                Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
+                break;
+              case wasm::SimdOp::V128Load32x2S:
+                Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
+                break;
+              case wasm::SimdOp::V128Load32x2U:
+                Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
+                break;
+              default:
+                MOZ_CRASH("Unexpected widening op for wasmLoad");
+            }
+          }
+        } else {
+          // LDR does the right thing also for access.isZeroExtendSimd128Load()
+          Ldr(SelectFPReg(outany, out64, 64), srcAddr);
+        }
+        break;
+      case Scalar::Simd128:
+        Ldr(SelectFPReg(outany, out64, 128), srcAddr);
+        break;
+      case Scalar::Uint8Clamped:
+      case Scalar::BigInt64:
+      case Scalar::BigUint64:
+      case Scalar::MaxTypedArrayViewType:
+        MOZ_CRASH("unexpected array type");
+    }
+  }
+
+  asMasm().memoryBarrierAfter(access.sync());
+}
+
+// Return true if `address` can be represented as an immediate (possibly scaled
+// by the access size) in an LDR/STR type instruction.
+//
+// For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro().
+static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) {
+  // The predicates below operate on signed values only.
+  if (address > INT64_MAX) {
+    return false;
+  }
+
+  // The access size is always a power of 2, so computing the log amounts to
+  // counting trailing zeroes.
+  unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize);
+  return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) ||
+          MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize));
+}
+
+void MacroAssemblerCompat::wasmLoadAbsolute(
+    const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address,
+    AnyRegister output, Register64 out64) {
+  if (!IsLSImmediateOffset(address, access.byteSize())) {
+    // The access will require the constant to be loaded into a temp register.
+    // Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting
+    // trap information.
+    //
+    // Almost all constant addresses will in practice be handled by a single MOV
+    // so do not worry about additional optimizations here.
+    vixl::UseScratchRegisterScope temps(this);
+    ARMRegister scratch = temps.AcquireX();
+    Mov(scratch, address);
+    MemOperand srcAddr(X(memoryBase), scratch);
+    wasmLoadImpl(access, srcAddr, output, out64);
+  } else {
+    MemOperand srcAddr(X(memoryBase), address);
+    wasmLoadImpl(access, srcAddr, output, out64);
+  }
+}
+
+void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
+                                         AnyRegister valany, Register64 val64,
+                                         Register memoryBase_, Register ptr_) {
+  uint32_t offset = access.offset();
+  MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
+
+  ARMRegister memoryBase(memoryBase_, 64);
+  ARMRegister ptr(ptr_, 64);
+  if (offset) {
+    vixl::UseScratchRegisterScope temps(this);
+    ARMRegister scratch = temps.AcquireX();
+    Add(scratch, ptr, Operand(offset));
+    MemOperand destAddr(memoryBase, scratch);
+    wasmStoreImpl(access, destAddr, valany, val64);
+  } else {
+    MemOperand destAddr(memoryBase, ptr);
+    wasmStoreImpl(access, destAddr, valany, val64);
+  }
+}
+
+void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
+                                         MemOperand dstAddr, AnyRegister valany,
+                                         Register64 val64) {
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
+  asMasm().memoryBarrierBefore(access.sync());
+
+  {
+    // Reg+Reg addressing is directly encodable in one Store instruction, hence
+    // the AutoForbidPoolsAndNops will ensure that the access metadata is
+    // emitted at the address of the Store.  The AutoForbidPoolsAndNops will
+    // assert if we emit more than one instruction.
+
+    AutoForbidPoolsAndNops afp(this,
+                               /* max number of instructions in scope = */ 1);
+
+    append(access, asMasm().currentOffset());
+    switch (access.type()) {
+      case Scalar::Int8:
+      case Scalar::Uint8:
+        Strb(SelectGPReg(valany, val64), dstAddr);
+        break;
+      case Scalar::Int16:
+      case Scalar::Uint16:
+        Strh(SelectGPReg(valany, val64), dstAddr);
+        break;
+      case Scalar::Int32:
+      case Scalar::Uint32:
+        Str(SelectGPReg(valany, val64), dstAddr);
+        break;
+      case Scalar::Int64:
+        Str(SelectGPReg(valany, val64), dstAddr);
+        break;
+      case Scalar::Float32:
+        Str(SelectFPReg(valany, val64, 32), dstAddr);
+        break;
+      case Scalar::Float64:
+        Str(SelectFPReg(valany, val64, 64), dstAddr);
+        break;
+      case Scalar::Simd128:
+        Str(SelectFPReg(valany, val64, 128), dstAddr);
+        break;
+      case Scalar::Uint8Clamped:
+      case Scalar::BigInt64:
+      case Scalar::BigUint64:
+      case Scalar::MaxTypedArrayViewType:
+        MOZ_CRASH("unexpected array type");
+    }
+  }
+
+  asMasm().memoryBarrierAfter(access.sync());
+}
+
+void MacroAssemblerCompat::wasmStoreAbsolute(
+    const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64,
+    Register memoryBase, uint64_t address) {
+  // See comments in wasmLoadAbsolute.
+  unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize());
+  if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) ||
+                               IsImmLSUnscaled(int64_t(address)))) {
+    vixl::UseScratchRegisterScope temps(this);
+    ARMRegister scratch = temps.AcquireX();
+    Mov(scratch, address);
+    MemOperand destAddr(X(memoryBase), scratch);
+    wasmStoreImpl(access, destAddr, value, value64);
+  } else {
+    MemOperand destAddr(X(memoryBase), address);
+    wasmStoreImpl(access, destAddr, value, value64);
+  }
+}
+
+void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond,
+                                             ARMFPRegister dest,
+                                             ARMFPRegister lhs,
+                                             ARMFPRegister rhs) {
+  switch (cond) {
+    case Assembler::Equal:
+      Cmeq(dest, lhs, rhs);
+      break;
+    case Assembler::NotEqual:
+      Cmeq(dest, lhs, rhs);
+      Mvn(dest, dest);
+      break;
+    case Assembler::GreaterThan:
+      Cmgt(dest, lhs, rhs);
+      break;
+    case Assembler::GreaterThanOrEqual:
+      Cmge(dest, lhs, rhs);
+      break;
+    case Assembler::LessThan:
+      Cmgt(dest, rhs, lhs);
+      break;
+    case Assembler::LessThanOrEqual:
+      Cmge(dest, rhs, lhs);
+      break;
+    case Assembler::Above:
+      Cmhi(dest, lhs, rhs);
+      break;
+    case Assembler::AboveOrEqual:
+      Cmhs(dest, lhs, rhs);
+      break;
+    case Assembler::Below:
+      Cmhi(dest, rhs, lhs);
+      break;
+    case Assembler::BelowOrEqual:
+      Cmhs(dest, rhs, lhs);
+      break;
+    default:
+      MOZ_CRASH("Unexpected SIMD integer condition");
+  }
+}
+
+void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond,
+                                               ARMFPRegister dest,
+                                               ARMFPRegister lhs,
+                                               ARMFPRegister rhs) {
+  switch (cond) {
+    case Assembler::Equal:
+      Fcmeq(dest, lhs, rhs);
+      break;
+    case Assembler::NotEqual:
+      Fcmeq(dest, lhs, rhs);
+      Mvn(dest, dest);
+      break;
+    case Assembler::GreaterThan:
+      Fcmgt(dest, lhs, rhs);
+      break;
+    case Assembler::GreaterThanOrEqual:
+      Fcmge(dest, lhs, rhs);
+      break;
+    case Assembler::LessThan:
+      Fcmgt(dest, rhs, lhs);
+      break;
+    case Assembler::LessThanOrEqual:
+      Fcmge(dest, rhs, lhs);
+      break;
+    default:
+      MOZ_CRASH("Unexpected SIMD integer condition");
+  }
+}
+
+void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs,
+                                             FloatRegister dest,
+                                             bool isUnsigned) {
+  ScratchSimd128Scope scratch_(asMasm());
+  ARMFPRegister shift = Simd16B(scratch_);
+
+  Dup(shift, ARMRegister(rhs, 32));
+  Neg(shift, shift);
+
+  if (isUnsigned) {
+    Ushl(Simd16B(dest), Simd16B(lhs), shift);
+  } else {
+    Sshl(Simd16B(dest), Simd16B(lhs), shift);
+  }
+}
+
+void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs,
+                                             FloatRegister dest,
+                                             bool isUnsigned) {
+  ScratchSimd128Scope scratch_(asMasm());
+  ARMFPRegister shift = Simd8H(scratch_);
+
+  Dup(shift, ARMRegister(rhs, 32));
+  Neg(shift, shift);
+
+  if (isUnsigned) {
+    Ushl(Simd8H(dest), Simd8H(lhs), shift);
+  } else {
+    Sshl(Simd8H(dest), Simd8H(lhs), shift);
+  }
+}
+
+void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs,
+                                             FloatRegister dest,
+                                             bool isUnsigned) {
+  ScratchSimd128Scope scratch_(asMasm());
+  ARMFPRegister shift = Simd4S(scratch_);
+
+  Dup(shift, ARMRegister(rhs, 32));
+  Neg(shift, shift);
+
+  if (isUnsigned) {
+    Ushl(Simd4S(dest), Simd4S(lhs), shift);
+  } else {
+    Sshl(Simd4S(dest), Simd4S(lhs), shift);
+  }
+}
+
+void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs,
+                                             FloatRegister dest,
+                                             bool isUnsigned) {
+  ScratchSimd128Scope scratch_(asMasm());
+  ARMFPRegister shift = Simd2D(scratch_);
+
+  Dup(shift, ARMRegister(rhs, 64));
+  Neg(shift, shift);
+
+  if (isUnsigned) {
+    Ushl(Simd2D(dest), Simd2D(lhs), shift);
+  } else {
+    Sshl(Simd2D(dest), Simd2D(lhs), shift);
+  }
+}
+
+void MacroAssembler::reserveStack(uint32_t amount) {
+  // TODO: This bumps |sp| every time we reserve using a second register.
+  // It would save some instructions if we had a fixed frame size.
+  vixl::MacroAssembler::Claim(Operand(amount));
+  adjustFrame(amount);
+}
+
+void MacroAssembler::Push(RegisterOrSP reg) {
+  if (IsHiddenSP(reg)) {
+    push(sp);
+  } else {
+    push(AsRegister(reg));
+  }
+  adjustFrame(sizeof(intptr_t));
+}
+
+//{{{ check_macroassembler_style
+// ===============================================================
+// MacroAssembler high-level usage.
+
+void MacroAssembler::flush() { Assembler::flush(); }
+
+// ===============================================================
+// Stack manipulation functions.
+
+// Routines for saving/restoring registers on the stack.  The format is:
+//
+//   (highest address)
+//
+//   integer (X) regs in any order      size: 8 * # int regs
+//
+//   if # int regs is odd,
+//     then an 8 byte alignment hole    size: 0 or 8
+//
+//   double (D) regs in any order       size: 8 * # double regs
+//
+//   if # double regs is odd,
+//     then an 8 byte alignment hole    size: 0 or 8
+//
+//   vector (Q) regs in any order       size: 16 * # vector regs
+//
+//   (lowest address)
+//
+// Hence the size of the save area is 0 % 16.  And, provided that the base
+// (highest) address is 16-aligned, then the vector reg save/restore accesses
+// will also be 16-aligned, as will pairwise operations for the double regs.
+//
+// Implied by this is that the format of the double and vector dump area
+// corresponds with what FloatRegister::GetPushSizeInBytes computes.
+// See block comment in MacroAssembler.h for more details.
+
+size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) {
+  size_t numIntRegs = set.gprs().size();
+  return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) +
+         FloatRegister::GetPushSizeInBytes(set.fpus());
+}
+
+// Generate code to dump the values in `set`, either on the stack if `dest` is
+// `Nothing` or working backwards from the address denoted by `dest` if it is
+// `Some`.  These two cases are combined so as to minimise the chance of
+// mistakenly generating different formats for the same `set`, given that the
+// `Some` `dest` case is used extremely rarely.
+static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set,
+                                  mozilla::Maybe<Address> dest) {
+  static_assert(sizeof(FloatRegisters::RegisterContent) == 16);
+
+  // If we're saving to arbitrary memory, check the destination is big enough.
+  if (dest) {
+    mozilla::DebugOnly<size_t> bytesRequired =
+        masm->PushRegsInMaskSizeInBytes(set);
+    MOZ_ASSERT(dest->offset >= 0);
+    MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired);
+  }
+
+  // Note the high limit point; we'll check it again later.
+  mozilla::DebugOnly<size_t> maxExtentInitial =
+      dest ? dest->offset : masm->framePushed();
+
+  // Gather up the integer registers in groups of four, and either push each
+  // group as a single transfer so as to minimise the number of stack pointer
+  // changes, or write them individually to memory.  Take care to ensure the
+  // space used remains 16-aligned.
+  for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) {
+    vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg,
+                                vixl::NoCPUReg};
+    size_t i;
+    for (i = 0; i < 4 && iter.more(); i++) {
+      src[i] = ARMRegister(*iter, 64);
+      ++iter;
+    }
+    MOZ_ASSERT(i > 0);
+
+    if (i == 1 || i == 3) {
+      // Ensure the stack remains 16-aligned
+      MOZ_ASSERT(!iter.more());
+      src[i] = vixl::xzr;
+      i++;
+    }
+    MOZ_ASSERT(i == 2 || i == 4);
+
+    if (dest) {
+      for (size_t j = 0; j < i; j++) {
+        Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr
+                                                           : src[j].code());
+        dest->offset -= sizeof(intptr_t);
+        masm->storePtr(ireg, *dest);
+      }
+    } else {
+      masm->adjustFrame(i * 8);
+      masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
+    }
+  }
+
+  // Now the same for the FP double registers.  Note that because of how
+  // ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either
+  // be present as a double register, or as a V128 register, but not both.
+  // Firstly, round up the registers to be pushed.
+
+  FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
+  vixl::CPURegister allSrcs[FloatRegisters::TotalPhys];
+  size_t numAllSrcs = 0;
+
+  for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
+    FloatRegister reg = *iter;
+    if (reg.isDouble()) {
+      MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
+      allSrcs[numAllSrcs] = ARMFPRegister(reg, 64);
+      numAllSrcs++;
+    } else {
+      MOZ_ASSERT(reg.isSimd128());
+    }
+  }
+  MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
+
+  if ((numAllSrcs & 1) == 1) {
+    // We've got an odd number of doubles.  In order to maintain 16-alignment,
+    // push the last register twice.  We'll skip over the duplicate in
+    // PopRegsInMaskIgnore.
+    allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1];
+    numAllSrcs++;
+  }
+  MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
+  MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0);
+
+  // And now generate the transfers.
+  size_t i;
+  if (dest) {
+    for (i = 0; i < numAllSrcs; i++) {
+      FloatRegister freg =
+          FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
+                        FloatRegisters::Kind::Double);
+      dest->offset -= sizeof(double);
+      masm->storeDouble(freg, *dest);
+    }
+  } else {
+    i = 0;
+    while (i < numAllSrcs) {
+      vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
+                                  vixl::NoCPUReg, vixl::NoCPUReg};
+      size_t j;
+      for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
+        src[j] = allSrcs[j + i];
+      }
+      masm->adjustFrame(8 * j);
+      masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
+      i += j;
+    }
+  }
+  MOZ_ASSERT(i == numAllSrcs);
+
+  // Finally, deal with the SIMD (V128) registers.  This is a bit simpler
+  // as there's no need for special-casing to maintain 16-alignment.
+
+  numAllSrcs = 0;
+  for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
+    FloatRegister reg = *iter;
+    if (reg.isSimd128()) {
+      MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
+      allSrcs[numAllSrcs] = ARMFPRegister(reg, 128);
+      numAllSrcs++;
+    }
+  }
+  MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
+
+  // Generate the transfers.
+  if (dest) {
+    for (i = 0; i < numAllSrcs; i++) {
+      FloatRegister freg =
+          FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
+                        FloatRegisters::Kind::Simd128);
+      dest->offset -= FloatRegister::SizeOfSimd128;
+      masm->storeUnalignedSimd128(freg, *dest);
+    }
+  } else {
+    i = 0;
+    while (i < numAllSrcs) {
+      vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
+                                  vixl::NoCPUReg, vixl::NoCPUReg};
+      size_t j;
+      for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
+        src[j] = allSrcs[j + i];
+      }
+      masm->adjustFrame(16 * j);
+      masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
+      i += j;
+    }
+  }
+  MOZ_ASSERT(i == numAllSrcs);
+
+  // Final overrun check.
+  if (dest) {
+    MOZ_ASSERT(maxExtentInitial - dest->offset ==
+               masm->PushRegsInMaskSizeInBytes(set));
+  } else {
+    MOZ_ASSERT(masm->framePushed() - maxExtentInitial ==
+               masm->PushRegsInMaskSizeInBytes(set));
+  }
+}
+
+void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
+  PushOrStoreRegsInMask(this, set, mozilla::Nothing());
+}
+
+void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
+                                     Register scratch) {
+  PushOrStoreRegsInMask(this, set, mozilla::Some(dest));
+}
+
+// This is a helper function for PopRegsInMaskIgnore below.  It emits the
+// loads described by dests[0] and [1] and offsets[0] and [1], generating a
+// load-pair if it can.
+static void GeneratePendingLoadsThenFlush(MacroAssembler* masm,
+                                          vixl::CPURegister* dests,
+                                          uint32_t* offsets,
+                                          uint32_t transactionSize) {
+  // Generate the loads ..
+  if (!dests[0].IsNone()) {
+    if (!dests[1].IsNone()) {
+      // [0] and [1] both present.
+      if (offsets[0] + transactionSize == offsets[1]) {
+        masm->Ldp(dests[0], dests[1],
+                  MemOperand(masm->GetStackPointer64(), offsets[0]));
+      } else {
+        // Theoretically we could check for a load-pair with the destinations
+        // switched, but our callers will never generate that.  Hence there's
+        // no loss in giving up at this point and generating two loads.
+        masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
+        masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1]));
+      }
+    } else {
+      // [0] only.
+      masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
+    }
+  } else {
+    if (!dests[1].IsNone()) {
+      // [1] only.  Can't happen because callers always fill [0] before [1].
+      MOZ_CRASH("GenerateLoadsThenFlush");
+    } else {
+      // Neither entry valid.  This can happen.
+    }
+  }
+
+  // .. and flush.
+  dests[0] = dests[1] = vixl::NoCPUReg;
+  offsets[0] = offsets[1] = 0;
+}
+
+void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
+                                         LiveRegisterSet ignore) {
+  mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
+
+  // The offset of the data from the stack pointer.
+  uint32_t offset = 0;
+
+  // The set of FP/SIMD registers we need to restore.
+  FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
+
+  // The set of registers to ignore.  BroadcastToAllSizes() is used to avoid
+  // any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore`
+  // containing d17.
+  FloatRegisterSet ignoreFpusBroadcasted(
+      FloatRegister::BroadcastToAllSizes(ignore.fpus()));
+
+  // First recover the SIMD (V128) registers.  This is straightforward in that
+  // we don't need to think about alignment holes.
+
+  // These three form a two-entry queue that holds loads that we know we
+  // need, but which we haven't yet emitted.
+  vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg};
+  uint32_t pendingOffsets[2] = {0, 0};
+  size_t nPending = 0;
+
+  for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
+    FloatRegister reg = *iter;
+    if (reg.isDouble()) {
+      continue;
+    }
+    MOZ_RELEASE_ASSERT(reg.isSimd128());
+
+    uint32_t offsetForReg = offset;
+    offset += FloatRegister::SizeOfSimd128;
+
+    if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
+      continue;
+    }
+
+    MOZ_ASSERT(nPending <= 2);
+    if (nPending == 2) {
+      GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
+      nPending = 0;
+    }
+    pendingDests[nPending] = ARMFPRegister(reg, 128);
+    pendingOffsets[nPending] = offsetForReg;
+    nPending++;
+  }
+  GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
+  nPending = 0;
+
+  MOZ_ASSERT((offset % 16) == 0);
+
+  // Now recover the FP double registers.  This is more tricky in that we need
+  // to skip over the lowest-addressed of them if the number of them was odd.
+
+  if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) {
+    offset += sizeof(double);
+  }
+
+  for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
+    FloatRegister reg = *iter;
+    if (reg.isSimd128()) {
+      continue;
+    }
+    /* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */
+
+    uint32_t offsetForReg = offset;
+    offset += sizeof(double);
+
+    if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
+      continue;
+    }
+
+    MOZ_ASSERT(nPending <= 2);
+    if (nPending == 2) {
+      GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
+      nPending = 0;
+    }
+    pendingDests[nPending] = ARMFPRegister(reg, 64);
+    pendingOffsets[nPending] = offsetForReg;
+    nPending++;
+  }
+  GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
+  nPending = 0;
+
+  MOZ_ASSERT((offset % 16) == 0);
+  MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes());
+
+  // And finally recover the integer registers, again skipping an alignment
+  // hole if it exists.
+
+  if ((set.gprs().size() & 1) == 1) {
+    offset += sizeof(uint64_t);
+  }
+
+  for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) {
+    Register reg = *iter;
+
+    uint32_t offsetForReg = offset;
+    offset += sizeof(uint64_t);
+
+    if (ignore.has(reg)) {
+      continue;
+    }
+
+    MOZ_ASSERT(nPending <= 2);
+    if (nPending == 2) {
+      GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
+      nPending = 0;
+    }
+    pendingDests[nPending] = ARMRegister(reg, 64);
+    pendingOffsets[nPending] = offsetForReg;
+    nPending++;
+  }
+  GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
+
+  MOZ_ASSERT((offset % 16) == 0);
+
+  size_t bytesPushed = PushRegsInMaskSizeInBytes(set);
+  MOZ_ASSERT(offset == bytesPushed);
+  freeStack(bytesPushed);
+}
+
+void MacroAssembler::Push(Register reg) {
+  push(reg);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(Register reg1, Register reg2, Register reg3,
+                          Register reg4) {
+  push(reg1, reg2, reg3, reg4);
+  adjustFrame(4 * sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const Imm32 imm) {
+  push(imm);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmWord imm) {
+  push(imm);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmPtr imm) {
+  push(imm);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmGCPtr ptr) {
+  push(ptr);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(FloatRegister f) {
+  push(f);
+  adjustFrame(sizeof(double));
+}
+
+void MacroAssembler::PushBoxed(FloatRegister reg) {
+  subFromStackPtr(Imm32(sizeof(double)));
+  boxDouble(reg, Address(getStackPointer(), 0));
+  adjustFrame(sizeof(double));
+}
+
+void MacroAssembler::Pop(Register reg) {
+  pop(reg);
+  adjustFrame(-1 * int64_t(sizeof(int64_t)));
+}
+
+void MacroAssembler::Pop(FloatRegister f) {
+  loadDouble(Address(getStackPointer(), 0), f);
+  freeStack(sizeof(double));
+}
+
+void MacroAssembler::Pop(const ValueOperand& val) {
+  pop(val);
+  adjustFrame(-1 * int64_t(sizeof(int64_t)));
+}
+
+// ===============================================================
+// Simple call functions.
+
+CodeOffset MacroAssembler::call(Register reg) {
+  // This sync has been observed (and is expected) to be necessary.
+  // eg testcase: tests/debug/bug1107525.js
+  syncStackPtr();
+  Blr(ARMRegister(reg, 64));
+  return CodeOffset(currentOffset());
+}
+
+CodeOffset MacroAssembler::call(Label* label) {
+  // This sync has been observed (and is expected) to be necessary.
+  // eg testcase: tests/basic/testBug504520Harder.js
+  syncStackPtr();
+  Bl(label);
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::call(ImmPtr imm) {
+  // This sync has been observed (and is expected) to be necessary.
+  // eg testcase: asm.js/testTimeout5.js
+  syncStackPtr();
+  vixl::UseScratchRegisterScope temps(this);
+  MOZ_ASSERT(temps.IsAvailable(ScratchReg64));  // ip0
+  temps.Exclude(ScratchReg64);
+  movePtr(imm, ScratchReg64.asUnsized());
+  Blr(ScratchReg64);
+}
+
+void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); }
+
+CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  // This sync is believed to be necessary, although no case in jit-test/tests
+  // has been observed to cause SP != PSP here.
+  syncStackPtr();
+  movePtr(imm, scratch);
+  Blr(ARMRegister(scratch, 64));
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::call(const Address& addr) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  // This sync has been observed (and is expected) to be necessary.
+  // eg testcase: tests/backup-point-bug1315634.js
+  syncStackPtr();
+  loadPtr(addr, scratch);
+  Blr(ARMRegister(scratch, 64));
+}
+
+void MacroAssembler::call(JitCode* c) {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  // This sync has been observed (and is expected) to be necessary.
+  // eg testcase: arrays/new-array-undefined-undefined-more-args-2.js
+  syncStackPtr();
+  BufferOffset off = immPool64(scratch64, uint64_t(c->raw()));
+  addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE);
+  blr(scratch64);
+}
+
+CodeOffset MacroAssembler::callWithPatch() {
+  // This needs to sync.  Wasm goes through this one for intramodule calls.
+  //
+  // In other cases, wasm goes through masm.wasmCallImport(),
+  // masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which
+  // sync.
+  //
+  // This sync is believed to be necessary, although no case in jit-test/tests
+  // has been observed to cause SP != PSP here.
+  syncStackPtr();
+  bl(0, LabelDoc());
+  return CodeOffset(currentOffset());
+}
+void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
+  Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4));
+  MOZ_ASSERT(inst->IsBL());
+  ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4);
+  ptrdiff_t relTarget00 = relTarget >> 2;
+  MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0);
+  MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00));
+  bl(inst, relTarget00);
+}
+
+CodeOffset MacroAssembler::farJumpWithPatch() {
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch = temps.AcquireX();
+  const ARMRegister scratch2 = temps.AcquireX();
+
+  AutoForbidPoolsAndNops afp(this,
+                             /* max number of instructions in scope = */ 7);
+
+  mozilla::DebugOnly<uint32_t> before = currentOffset();
+
+  align(8);  // At most one nop
+
+  Label branch;
+  adr(scratch2, &branch);
+  ldr(scratch, vixl::MemOperand(scratch2, 4));
+  add(scratch2, scratch2, scratch);
+  CodeOffset offs(currentOffset());
+  bind(&branch);
+  br(scratch2);
+  Emit(UINT32_MAX);
+  Emit(UINT32_MAX);
+
+  mozilla::DebugOnly<uint32_t> after = currentOffset();
+
+  MOZ_ASSERT(after - before == 24 || after - before == 28);
+
+  return offs;
+}
+
+void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) {
+  Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4));
+  Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8));
+
+  int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset();
+
+  MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX);
+  MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX);
+
+  inst1->SetInstructionBits((uint32_t)distance);
+  inst2->SetInstructionBits((uint32_t)(distance >> 32));
+}
+
+CodeOffset MacroAssembler::nopPatchableToCall() {
+  AutoForbidPoolsAndNops afp(this,
+                             /* max number of instructions in scope = */ 1);
+  Nop();
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) {
+  uint8_t* inst = call - 4;
+  Instruction* instr = reinterpret_cast<Instruction*>(inst);
+  MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
+  bl(instr, (target - inst) >> 2);
+}
+
+void MacroAssembler::patchCallToNop(uint8_t* call) {
+  uint8_t* inst = call - 4;
+  Instruction* instr = reinterpret_cast<Instruction*>(inst);
+  MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
+  nop(instr);
+}
+
+void MacroAssembler::pushReturnAddress() {
+  MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
+  push(lr);
+}
+
+void MacroAssembler::popReturnAddress() {
+  MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
+  pop(lr);
+}
+
+// ===============================================================
+// ABI function calls.
+
+void MacroAssembler::setupUnalignedABICall(Register scratch) {
+  // Because wasm operates without the need for dynamic alignment of SP, it is
+  // implied that this routine should never be called when generating wasm.
+  MOZ_ASSERT(!IsCompilingWasm());
+
+  // The following won't work for SP -- needs slightly different logic.
+  MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
+
+  setupNativeABICall();
+  dynamicAlignment_ = true;
+
+  int64_t alignment = ~(int64_t(ABIStackAlignment) - 1);
+  ARMRegister scratch64(scratch, 64);
+  MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64));
+
+  // Always save LR -- Baseline ICs assume that LR isn't modified.
+  push(lr);
+
+  // Remember the stack address on entry.  This is reloaded in callWithABIPost
+  // below.
+  Mov(scratch64, PseudoStackPointer64);
+
+  // Make alignment, including the effective push of the previous sp.
+  Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8));
+  And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment));
+  syncStackPtr();
+
+  // Store previous sp to the top of the stack, aligned.  This is also
+  // reloaded in callWithABIPost.
+  Str(scratch64, MemOperand(PseudoStackPointer64, 0));
+}
+
+void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
+  // wasm operates without the need for dynamic alignment of SP.
+  MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
+
+  MOZ_ASSERT(inCall_);
+  uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
+
+  // ARM64 *really* wants SP to always be 16-aligned, so ensure this now.
+  if (dynamicAlignment_) {
+    stackForCall += ComputeByteAlignment(stackForCall, StackAlignment);
+  } else {
+    // This can happen when we attach out-of-line stubs for rare cases.  For
+    // example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line
+    // chunk.
+    uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
+    stackForCall += ComputeByteAlignment(
+        stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
+  }
+
+  *stackAdjust = stackForCall;
+  reserveStack(*stackAdjust);
+  {
+    enoughMemory_ &= moveResolver_.resolve();
+    if (!enoughMemory_) {
+      return;
+    }
+    MoveEmitter emitter(*this);
+    emitter.emit(moveResolver_);
+    emitter.finish();
+  }
+
+  // Call boundaries communicate stack via SP.
+  // (jseward, 2021Mar03) This sync may well be redundant, given that all of
+  // the MacroAssembler::call methods generate a sync before the call.
+  // Removing it does not cause any failures for all of jit-tests.
+  syncStackPtr();
+
+  assertStackAlignment(ABIStackAlignment);
+}
+
+void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result,
+                                     bool callFromWasm) {
+  // wasm operates without the need for dynamic alignment of SP.
+  MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
+
+  // Call boundaries communicate stack via SP, so we must resync PSP now.
+  initPseudoStackPtr();
+
+  freeStack(stackAdjust);
+
+  if (dynamicAlignment_) {
+    // This then-clause makes more sense if you first read
+    // setupUnalignedABICall above.
+    //
+    // Restore the stack pointer from entry.  The stack pointer will have been
+    // saved by setupUnalignedABICall.  This is fragile in that it assumes
+    // that uses of this routine (callWithABIPost) with `dynamicAlignment_ ==
+    // true` are preceded by matching calls to setupUnalignedABICall.  But
+    // there's nothing that enforce that mechanically.  If we really want to
+    // enforce this, we could add a debug-only CallWithABIState enum to the
+    // MacroAssembler and assert that setupUnalignedABICall updates it before
+    // we get here, then reset it to its initial state.
+    Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0));
+    syncStackPtr();
+
+    // Restore LR.  This restores LR to the value stored by
+    // setupUnalignedABICall, which should have been called just before
+    // callWithABIPre.  This is, per the above comment, also fragile.
+    pop(lr);
+
+    // SP may be < PSP now.  That is expected from the behaviour of `pop`.  It
+    // is not clear why the following `syncStackPtr` is necessary, but it is:
+    // without it, the following test segfaults:
+    // tests/backup-point-bug1315634.js
+    syncStackPtr();
+  }
+
+  // If the ABI's return regs are where ION is expecting them, then
+  // no other work needs to be done.
+
+#ifdef DEBUG
+  MOZ_ASSERT(inCall_);
+  inCall_ = false;
+#endif
+}
+
+void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  movePtr(fun, scratch);
+
+  uint32_t stackAdjust;
+  callWithABIPre(&stackAdjust);
+  call(scratch);
+  callWithABIPost(stackAdjust, result);
+}
+
+void MacroAssembler::callWithABINoProfiler(const Address& fun,
+                                           MoveOp::Type result) {
+  vixl::UseScratchRegisterScope temps(this);
+  const Register scratch = temps.AcquireX().asUnsized();
+  loadPtr(fun, scratch);
+
+  uint32_t stackAdjust;
+  callWithABIPre(&stackAdjust);
+  call(scratch);
+  callWithABIPost(stackAdjust, result);
+}
+
+// ===============================================================
+// Jit Frames.
+
+uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) {
+  enterNoPool(3);
+  Label fakeCallsite;
+
+  Adr(ARMRegister(scratch, 64), &fakeCallsite);
+  Push(scratch);
+  bind(&fakeCallsite);
+  uint32_t pseudoReturnOffset = currentOffset();
+
+  leaveNoPool();
+  return pseudoReturnOffset;
+}
+
+bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) {
+  asMasm().PushFrameDescriptor(FrameType::IonJS);
+  asMasm().Push(ImmPtr(fakeReturnAddr));
+  asMasm().Push(FramePointer);
+  return true;
+}
+
+// ===============================================================
+// Move instructions
+
+void MacroAssembler::moveValue(const TypedOrValueRegister& src,
+                               const ValueOperand& dest) {
+  if (src.hasValue()) {
+    moveValue(src.valueReg(), dest);
+    return;
+  }
+
+  MIRType type = src.type();
+  AnyRegister reg = src.typedReg();
+
+  if (!IsFloatingPointType(type)) {
+    boxNonDouble(ValueTypeFromMIRType(type), reg.gpr(), dest);
+    return;
+  }
+
+  ScratchDoubleScope scratch(*this);
+  FloatRegister freg = reg.fpu();
+  if (type == MIRType::Float32) {
+    convertFloat32ToDouble(freg, scratch);
+    freg = scratch;
+  }
+  boxDouble(freg, dest, scratch);
+}
+
+void MacroAssembler::moveValue(const ValueOperand& src,
+                               const ValueOperand& dest) {
+  if (src == dest) {
+    return;
+  }
+  movePtr(src.valueReg(), dest.valueReg());
+}
+
+void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
+  if (!src.isGCThing()) {
+    movePtr(ImmWord(src.asRawBits()), dest.valueReg());
+    return;
+  }
+
+  BufferOffset load =
+      movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg());
+  writeDataRelocation(src, load);
+}
+
+// ===============================================================
+// Branch functions
+
+void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
+  And(ARMRegister(buffer, 64), ARMRegister(ptr, 64),
+      Operand(int32_t(~gc::ChunkMask)));
+  loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer);
+}
+
+void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
+                                             Register temp, Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  MOZ_ASSERT(ptr != temp);
+  MOZ_ASSERT(ptr != ScratchReg &&
+             ptr != ScratchReg2);  // Both may be used internally.
+  MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2);
+
+  And(ARMRegister(temp, 64), ARMRegister(ptr, 64),
+      Operand(int32_t(~gc::ChunkMask)));
+  branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
+            ImmWord(0), label);
+}
+
+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
+                                              const Address& address,
+                                              Register temp, Label* label) {
+  branchValueIsNurseryCellImpl(cond, address, temp, label);
+}
+
+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
+                                              ValueOperand value, Register temp,
+                                              Label* label) {
+  branchValueIsNurseryCellImpl(cond, value, temp, label);
+}
+
+template <typename T>
+void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond,
+                                                  const T& value, Register temp,
+                                                  Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  MOZ_ASSERT(temp != ScratchReg &&
+             temp != ScratchReg2);  // Both may be used internally.
+
+  Label done;
+  branchTestGCThing(Assembler::NotEqual, value,
+                    cond == Assembler::Equal ? &done : label);
+
+  getGCThingValueChunk(value, temp);
+  branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
+            ImmWord(0), label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
+                                     const Value& rhs, Label* label) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  vixl::UseScratchRegisterScope temps(this);
+  const ARMRegister scratch64 = temps.AcquireX();
+  MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg());
+  moveValue(rhs, ValueOperand(scratch64.asUnsized()));
+  Cmp(ARMRegister(lhs.valueReg(), 64), scratch64);
+  B(label, cond);
+}
+
+// ========================================================================
+// Memory access primitives.
+template <typename T>
+void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
+                                       MIRType valueType, const T& dest) {
+  MOZ_ASSERT(valueType < MIRType::Value);
+
+  if (valueType == MIRType::Double) {
+    boxDouble(value.reg().typedReg().fpu(), dest);
+    return;
+  }
+
+  if (value.constant()) {
+    storeValue(value.value(), dest);
+  } else {
+    storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(),
+               dest);
+  }
+}
+
+template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
+                                                MIRType valueType,
+                                                const Address& dest);
+template void MacroAssembler::storeUnboxedValue(
+    const ConstantOrRegister& value, MIRType valueType,
+    const BaseObjectElementIndex& dest);
+
+void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); }
+
+// ========================================================================
+// wasm support
+
+CodeOffset MacroAssembler::wasmTrapInstruction() {
+  AutoForbidPoolsAndNops afp(this,
+                             /* max number of instructions in scope = */ 1);
+  CodeOffset offs(currentOffset());
+  Unreachable();
+  return offs;
+}
+
+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
+                                       Register boundsCheckLimit, Label* ok) {
+  branch32(cond, index, boundsCheckLimit, ok);
+  if (JitOptions.spectreIndexMasking) {
+    csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
+  }
+}
+
+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
+                                       Address boundsCheckLimit, Label* ok) {
+  branch32(cond, index, boundsCheckLimit, ok);
+  if (JitOptions.spectreIndexMasking) {
+    csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
+  }
+}
+
+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
+                                       Register64 boundsCheckLimit, Label* ok) {
+  branchPtr(cond, index.reg, boundsCheckLimit.reg, ok);
+  if (JitOptions.spectreIndexMasking) {
+    csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
+         cond);
+  }
+}
+
+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
+                                       Address boundsCheckLimit, Label* ok) {
+  branchPtr(InvertCondition(cond), boundsCheckLimit, index.reg, ok);
+  if (JitOptions.spectreIndexMasking) {
+    csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
+         cond);
+  }
+}
+
+// FCVTZU behaves as follows:
+//
+// on NaN it produces zero
+// on too large it produces UINT_MAX (for appropriate type)
+// on too small it produces zero
+//
+// FCVTZS behaves as follows:
+//
+// on NaN it produces zero
+// on too large it produces INT_MAX (for appropriate type)
+// on too small it produces INT_MIN (ditto)
+
+void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_,
+                                                Register output_,
+                                                bool isSaturating,
+                                                Label* oolEntry) {
+  ARMRegister output(output_, 32);
+  ARMFPRegister input(input_, 64);
+  Fcvtzu(output, input);
+  if (!isSaturating) {
+    Cmp(output, 0);
+    Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
+    B(oolEntry, Assembler::Equal);
+  }
+}
+
+void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_,
+                                                 Register output_,
+                                                 bool isSaturating,
+                                                 Label* oolEntry) {
+  ARMRegister output(output_, 32);
+  ARMFPRegister input(input_, 32);
+  Fcvtzu(output, input);
+  if (!isSaturating) {
+    Cmp(output, 0);
+    Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
+    B(oolEntry, Assembler::Equal);
+  }
+}
+
+void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_,
+                                               Register output_,
+                                               bool isSaturating,
+                                               Label* oolEntry) {
+  ARMRegister output(output_, 32);
+  ARMFPRegister input(input_, 64);
+  Fcvtzs(output, input);
+  if (!isSaturating) {
+    Cmp(output, 0);
+    Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
+    Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
+    B(oolEntry, Assembler::Equal);
+  }
+}
+
+void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_,
+                                                Register output_,
+                                                bool isSaturating,
+                                                Label* oolEntry) {
+  ARMRegister output(output_, 32);
+  ARMFPRegister input(input_, 32);
+  Fcvtzs(output, input);
+  if (!isSaturating) {
+    Cmp(output, 0);
+    Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
+    Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
+    B(oolEntry, Assembler::Equal);
+  }
+}
+
+void MacroAssembler::wasmTruncateDoubleToUInt64(
+    FloatRegister input_, Register64 output_, bool isSaturating,
+    Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
+  MOZ_ASSERT(tempDouble.isInvalid());
+
+  ARMRegister output(output_.reg, 64);
+  ARMFPRegister input(input_, 64);
+  Fcvtzu(output, input);
+  if (!isSaturating) {
+    Cmp(output, 0);
+    Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
+    B(oolEntry, Assembler::Equal);
+    bind(oolRejoin);
+  }
+}
+
+void MacroAssembler::wasmTruncateFloat32ToUInt64(
+    FloatRegister input_, Register64 output_, bool isSaturating,
+    Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
+  MOZ_ASSERT(tempDouble.isInvalid());
+
+  ARMRegister output(output_.reg, 64);
+  ARMFPRegister input(input_, 32);
+  Fcvtzu(output, input);
+  if (!isSaturating) {
+    Cmp(output, 0);
+    Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
+    B(oolEntry, Assembler::Equal);
+    bind(oolRejoin);
+  }
+}
+
+void MacroAssembler::wasmTruncateDoubleToInt64(
+    FloatRegister input_, Register64 output_, bool isSaturating,
+    Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
+  MOZ_ASSERT(tempDouble.isInvalid());
+
+  ARMRegister output(output_.reg, 64);
+  ARMFPRegister input(input_, 64);
+  Fcvtzs(output, input);
+  if (!isSaturating) {
+    Cmp(output, 0);
+    Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
+    Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
+    B(oolEntry, Assembler::Equal);
+    bind(oolRejoin);
+  }
+}
+
+void MacroAssembler::wasmTruncateFloat32ToInt64(
+    FloatRegister input_, Register64 output_, bool isSaturating,
+    Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
+  ARMRegister output(output_.reg, 64);
+  ARMFPRegister input(input_, 32);
+  Fcvtzs(output, input);
+  if (!isSaturating) {
+    Cmp(output, 0);
+    Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
+    Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
+    B(oolEntry, Assembler::Equal);
+    bind(oolRejoin);
+  }
+}
+
+void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input,
+                                                  Register output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  Label notNaN;
+  branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
+  wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
+  bind(&notNaN);
+
+  Label isOverflow;
+  const float two_31 = -float(INT32_MIN);
+  ScratchFloat32Scope fpscratch(*this);
+  if (flags & TRUNC_UNSIGNED) {
+    loadConstantFloat32(two_31 * 2, fpscratch);
+    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+                &isOverflow);
+    loadConstantFloat32(-1.0f, fpscratch);
+    branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+  } else {
+    loadConstantFloat32(two_31, fpscratch);
+    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+                &isOverflow);
+    loadConstantFloat32(-two_31, fpscratch);
+    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
+  }
+  bind(&isOverflow);
+  wasmTrap(wasm::Trap::IntegerOverflow, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input,
+                                                  Register output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  Label notNaN;
+  branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
+  wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
+  bind(&notNaN);
+
+  Label isOverflow;
+  const double two_31 = -double(INT32_MIN);
+  ScratchDoubleScope fpscratch(*this);
+  if (flags & TRUNC_UNSIGNED) {
+    loadConstantDouble(two_31 * 2, fpscratch);
+    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+                 &isOverflow);
+    loadConstantDouble(-1.0, fpscratch);
+    branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+  } else {
+    loadConstantDouble(two_31, fpscratch);
+    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+                 &isOverflow);
+    loadConstantDouble(-two_31 - 1, fpscratch);
+    branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+  }
+  bind(&isOverflow);
+  wasmTrap(wasm::Trap::IntegerOverflow, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input,
+                                                  Register64 output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  Label notNaN;
+  branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
+  wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
+  bind(&notNaN);
+
+  Label isOverflow;
+  const float two_63 = -float(INT64_MIN);
+  ScratchFloat32Scope fpscratch(*this);
+  if (flags & TRUNC_UNSIGNED) {
+    loadConstantFloat32(two_63 * 2, fpscratch);
+    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+                &isOverflow);
+    loadConstantFloat32(-1.0f, fpscratch);
+    branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+  } else {
+    loadConstantFloat32(two_63, fpscratch);
+    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+                &isOverflow);
+    loadConstantFloat32(-two_63, fpscratch);
+    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
+  }
+  bind(&isOverflow);
+  wasmTrap(wasm::Trap::IntegerOverflow, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input,
+                                                  Register64 output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  Label notNaN;
+  branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
+  wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
+  bind(&notNaN);
+
+  Label isOverflow;
+  const double two_63 = -double(INT64_MIN);
+  ScratchDoubleScope fpscratch(*this);
+  if (flags & TRUNC_UNSIGNED) {
+    loadConstantDouble(two_63 * 2, fpscratch);
+    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+                 &isOverflow);
+    loadConstantDouble(-1.0, fpscratch);
+    branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+  } else {
+    loadConstantDouble(two_63, fpscratch);
+    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+                 &isOverflow);
+    loadConstantDouble(-two_63, fpscratch);
+    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
+  }
+  bind(&isOverflow);
+  wasmTrap(wasm::Trap::IntegerOverflow, off);
+}
+
+void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
+                              Register memoryBase, Register ptr,
+                              AnyRegister output) {
+  wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid());
+}
+
+void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
+                                 Register memoryBase, Register ptr,
+                                 Register64 output) {
+  wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output);
+}
+
+void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
+                               AnyRegister value, Register memoryBase,
+                               Register ptr) {
+  wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr);
+}
+
+void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
+                                  Register64 value, Register memoryBase,
+                                  Register ptr) {
+  wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr);
+}
+
+void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch,
+                                               ExitFrameType type) {
+  // Wasm stubs use the native SP, not the PSP.
+
+  linkExitFrame(cxreg, scratch);
+
+  MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64()));
+
+  // SP has to be 16-byte aligned when we do a load/store, so push |type| twice
+  // and then add 8 bytes to SP. This leaves SP unaligned.
+  move32(Imm32(int32_t(type)), scratch);
+  push(scratch, scratch);
+  Add(sp, sp, 8);
+
+  // Despite the above assertion, it is possible for control to flow from here
+  // to the code generated by
+  // MacroAssemblerCompat::handleFailureWithHandlerTail without any
+  // intervening assignment to PSP.  But handleFailureWithHandlerTail assumes
+  // that PSP is the active stack pointer.  Hence the following is necessary
+  // for safety.  Note we can't use initPseudoStackPtr here as that would
+  // generate no instructions.
+  Mov(PseudoStackPointer64, sp);
+}
+
+void MacroAssembler::widenInt32(Register r) {
+  move32To64ZeroExtend(r, Register64(r));
+}
+
+// ========================================================================
+// Convert floating point.
+
+bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
+
+void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
+                                           Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+  Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) {
+  Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest,
+                                            Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+  Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) {
+  Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
+  convertInt64ToDouble(Register64(src), dest);
+}
+
+// ========================================================================
+// Primitive atomic operations.
+
+// The computed MemOperand must be Reg+0 because the load/store exclusive
+// instructions only take a single pointer register.
+
+static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
+                                          const Address& address,
+                                          Register scratch) {
+  if (address.offset == 0) {
+    return MemOperand(X(masm, address.base), 0);
+  }
+
+  masm.Add(X(scratch), X(masm, address.base), address.offset);
+  return MemOperand(X(scratch), 0);
+}
+
+static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
+                                          const BaseIndex& address,
+                                          Register scratch) {
+  masm.Add(X(scratch), X(masm, address.base),
+           Operand(X(address.index), vixl::LSL, address.scale));
+  if (address.offset) {
+    masm.Add(X(scratch), X(scratch), address.offset);
+  }
+  return MemOperand(X(scratch), 0);
+}
+
+// This sign extends to targetWidth and leaves any higher bits zero.
+
+static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType,
+                             Width targetWidth, Register src, Register dest) {
+  bool signExtend = Scalar::isSignedIntType(srcType);
+
+  switch (Scalar::byteSize(srcType)) {
+    case 1:
+      if (signExtend) {
+        masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
+      } else {
+        masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
+      }
+      break;
+    case 2:
+      if (signExtend) {
+        masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
+      } else {
+        masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
+      }
+      break;
+    case 4:
+      if (targetWidth == Width::_64) {
+        if (signExtend) {
+          masm.Sbfm(X(dest), X(src), 0, 31);
+        } else {
+          masm.Ubfm(X(dest), X(src), 0, 31);
+        }
+      } else if (src != dest) {
+        masm.Mov(R(dest, targetWidth), R(src, targetWidth));
+      }
+      break;
+    case 8:
+      if (src != dest) {
+        masm.Mov(R(dest, targetWidth), R(src, targetWidth));
+      }
+      break;
+    default:
+      MOZ_CRASH();
+  }
+}
+
+// Exclusive-loads zero-extend their values to the full width of the X register.
+//
+// Note, we've promised to leave the high bits of the 64-bit register clear if
+// the targetWidth is 32.
+
+static void LoadExclusive(MacroAssembler& masm,
+                          const wasm::MemoryAccessDesc* access,
+                          Scalar::Type srcType, Width targetWidth,
+                          MemOperand ptr, Register dest) {
+  bool signExtend = Scalar::isSignedIntType(srcType);
+
+  // With this address form, a single native ldxr* will be emitted, and the
+  // AutoForbidPoolsAndNops ensures that the metadata is emitted at the address
+  // of the ldxr*.
+  MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0);
+
+  switch (Scalar::byteSize(srcType)) {
+    case 1: {
+      {
+        AutoForbidPoolsAndNops afp(
+            &masm,
+            /* max number of instructions in scope = */ 1);
+        if (access) {
+          masm.append(*access, masm.currentOffset());
+        }
+        masm.Ldxrb(W(dest), ptr);
+      }
+      if (signExtend) {
+        masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7);
+      }
+      break;
+    }
+    case 2: {
+      {
+        AutoForbidPoolsAndNops afp(
+            &masm,
+            /* max number of instructions in scope = */ 1);
+        if (access) {
+          masm.append(*access, masm.currentOffset());
+        }
+        masm.Ldxrh(W(dest), ptr);
+      }
+      if (signExtend) {
+        masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15);
+      }
+      break;
+    }
+    case 4: {
+      {
+        AutoForbidPoolsAndNops afp(
+            &masm,
+            /* max number of instructions in scope = */ 1);
+        if (access) {
+          masm.append(*access, masm.currentOffset());
+        }
+        masm.Ldxr(W(dest), ptr);
+      }
+      if (targetWidth == Width::_64 && signExtend) {
+        masm.Sbfm(X(dest), X(dest), 0, 31);
+      }
+      break;
+    }
+    case 8: {
+      {
+        AutoForbidPoolsAndNops afp(
+            &masm,
+            /* max number of instructions in scope = */ 1);
+        if (access) {
+          masm.append(*access, masm.currentOffset());
+        }
+        masm.Ldxr(X(dest), ptr);
+      }
+      break;
+    }
+    default: {
+      MOZ_CRASH();
+    }
+  }
+}
+
+static void StoreExclusive(MacroAssembler& masm, Scalar::Type type,
+                           Register status, Register src, MemOperand ptr) {
+  switch (Scalar::byteSize(type)) {
+    case 1:
+      masm.Stxrb(W(status), W(src), ptr);
+      break;
+    case 2:
+      masm.Stxrh(W(status), W(src), ptr);
+      break;
+    case 4:
+      masm.Stxr(W(status), W(src), ptr);
+      break;
+    case 8:
+      masm.Stxr(W(status), X(src), ptr);
+      break;
+  }
+}
+
+static bool HasAtomicInstructions(MacroAssembler& masm) {
+  return masm.asVIXL().GetCPUFeatures()->Has(vixl::CPUFeatures::kAtomics);
+}
+
+static inline bool SupportedAtomicInstructionOperands(Scalar::Type type,
+                                                      Width targetWidth) {
+  if (targetWidth == Width::_32) {
+    return byteSize(type) <= 4;
+  }
+  if (targetWidth == Width::_64) {
+    return byteSize(type) == 8;
+  }
+  return false;
+}
+
+template <typename T>
+static void CompareExchange(MacroAssembler& masm,
+                            const wasm::MemoryAccessDesc* access,
+                            Scalar::Type type, Width targetWidth,
+                            const Synchronization& sync, const T& mem,
+                            Register oldval, Register newval, Register output) {
+  MOZ_ASSERT(oldval != output && newval != output);
+
+  vixl::UseScratchRegisterScope temps(&masm);
+
+  Register ptrScratch = temps.AcquireX().asUnsized();
+  MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
+
+  MOZ_ASSERT(ptr.base().asUnsized() != output);
+
+  if (HasAtomicInstructions(masm) &&
+      SupportedAtomicInstructionOperands(type, targetWidth)) {
+    masm.Mov(X(output), X(oldval));
+    // Capal is using same atomic mechanism as Ldxr/Stxr, and
+    // consider it is the same for "Inner Shareable" domain.
+    // Not updated gen_cmpxchg in GenerateAtomicOperations.py.
+    masm.memoryBarrierBefore(sync);
+    if (access) {
+      masm.append(*access, masm.currentOffset());
+    }
+    switch (byteSize(type)) {
+      case 1:
+        masm.Casalb(R(output, targetWidth), R(newval, targetWidth), ptr);
+        break;
+      case 2:
+        masm.Casalh(R(output, targetWidth), R(newval, targetWidth), ptr);
+        break;
+      case 4:
+      case 8:
+        masm.Casal(R(output, targetWidth), R(newval, targetWidth), ptr);
+        break;
+      default:
+        MOZ_CRASH("CompareExchange unsupported type");
+    }
+    masm.memoryBarrierAfter(sync);
+    SignOrZeroExtend(masm, type, targetWidth, output, output);
+    return;
+  }
+
+  // The target doesn't support atomics, so generate a LL-SC loop. This requires
+  // only AArch64 v8.0.
+  Label again;
+  Label done;
+
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  Register scratch = temps.AcquireX().asUnsized();
+
+  masm.bind(&again);
+  SignOrZeroExtend(masm, type, targetWidth, oldval, scratch);
+  LoadExclusive(masm, access, type, targetWidth, ptr, output);
+  masm.Cmp(R(output, targetWidth), R(scratch, targetWidth));
+  masm.B(&done, MacroAssembler::NotEqual);
+  StoreExclusive(masm, type, scratch, newval, ptr);
+  masm.Cbnz(W(scratch), &again);
+  masm.bind(&done);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+template <typename T>
+static void AtomicExchange(MacroAssembler& masm,
+                           const wasm::MemoryAccessDesc* access,
+                           Scalar::Type type, Width targetWidth,
+                           const Synchronization& sync, const T& mem,
+                           Register value, Register output) {
+  MOZ_ASSERT(value != output);
+
+  vixl::UseScratchRegisterScope temps(&masm);
+
+  Register ptrScratch = temps.AcquireX().asUnsized();
+  MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
+
+  if (HasAtomicInstructions(masm) &&
+      SupportedAtomicInstructionOperands(type, targetWidth)) {
+    // Swpal is using same atomic mechanism as Ldxr/Stxr, and
+    // consider it is the same for "Inner Shareable" domain.
+    // Not updated gen_exchange in GenerateAtomicOperations.py.
+    masm.memoryBarrierBefore(sync);
+    if (access) {
+      masm.append(*access, masm.currentOffset());
+    }
+    switch (byteSize(type)) {
+      case 1:
+        masm.Swpalb(R(value, targetWidth), R(output, targetWidth), ptr);
+        break;
+      case 2:
+        masm.Swpalh(R(value, targetWidth), R(output, targetWidth), ptr);
+        break;
+      case 4:
+      case 8:
+        masm.Swpal(R(value, targetWidth), R(output, targetWidth), ptr);
+        break;
+      default:
+        MOZ_CRASH("AtomicExchange unsupported type");
+    }
+    masm.memoryBarrierAfter(sync);
+    SignOrZeroExtend(masm, type, targetWidth, output, output);
+    return;
+  }
+
+  // The target doesn't support atomics, so generate a LL-SC loop. This requires
+  // only AArch64 v8.0.
+  Label again;
+
+  // NOTE: the generated code must match the assembly code in gen_exchange in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  Register scratch = temps.AcquireX().asUnsized();
+
+  masm.bind(&again);
+  LoadExclusive(masm, access, type, targetWidth, ptr, output);
+  StoreExclusive(masm, type, scratch, value, ptr);
+  masm.Cbnz(W(scratch), &again);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+template <bool wantResult, typename T>
+static void AtomicFetchOp(MacroAssembler& masm,
+                          const wasm::MemoryAccessDesc* access,
+                          Scalar::Type type, Width targetWidth,
+                          const Synchronization& sync, AtomicOp op,
+                          const T& mem, Register value, Register temp,
+                          Register output) {
+  MOZ_ASSERT(value != output);
+  MOZ_ASSERT(value != temp);
+  MOZ_ASSERT_IF(wantResult, output != temp);
+
+  vixl::UseScratchRegisterScope temps(&masm);
+
+  Register ptrScratch = temps.AcquireX().asUnsized();
+  MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
+
+  if (HasAtomicInstructions(masm) &&
+      SupportedAtomicInstructionOperands(type, targetWidth) &&
+      !isFloatingType(type)) {
+    // LdXXXal/StXXXl is using same atomic mechanism as Ldxr/Stxr, and
+    // consider it is the same for "Inner Shareable" domain.
+    // Not updated gen_fetchop in GenerateAtomicOperations.py.
+    masm.memoryBarrierBefore(sync);
+
+#define FETCH_OP_CASE(op, arg)                                              \
+  if (access) {                                                             \
+    masm.append(*access, masm.currentOffset());                             \
+  }                                                                         \
+  switch (byteSize(type)) {                                                 \
+    case 1:                                                                 \
+      if (wantResult) {                                                     \
+        masm.Ld##op##alb(R(arg, targetWidth), R(output, targetWidth), ptr); \
+      } else {                                                              \
+        masm.St##op##lb(R(arg, targetWidth), ptr);                          \
+      }                                                                     \
+      break;                                                                \
+    case 2:                                                                 \
+      if (wantResult) {                                                     \
+        masm.Ld##op##alh(R(arg, targetWidth), R(output, targetWidth), ptr); \
+      } else {                                                              \
+        masm.St##op##lh(R(arg, targetWidth), ptr);                          \
+      }                                                                     \
+      break;                                                                \
+    case 4:                                                                 \
+    case 8:                                                                 \
+      if (wantResult) {                                                     \
+        masm.Ld##op##al(R(arg, targetWidth), R(output, targetWidth), ptr);  \
+      } else {                                                              \
+        masm.St##op##l(R(arg, targetWidth), ptr);                           \
+      }                                                                     \
+      break;                                                                \
+    default:                                                                \
+      MOZ_CRASH("AtomicFetchOp unsupported type");                          \
+  }
+
+    switch (op) {
+      case AtomicFetchAddOp:
+        FETCH_OP_CASE(add, value);
+        break;
+      case AtomicFetchSubOp: {
+        Register scratch = temps.AcquireX().asUnsized();
+        masm.Neg(X(scratch), X(value));
+        FETCH_OP_CASE(add, scratch);
+        break;
+      }
+      case AtomicFetchAndOp: {
+        Register scratch = temps.AcquireX().asUnsized();
+        masm.Eor(X(scratch), X(value), Operand(~0));
+        FETCH_OP_CASE(clr, scratch);
+        break;
+      }
+      case AtomicFetchOrOp:
+        FETCH_OP_CASE(set, value);
+        break;
+      case AtomicFetchXorOp:
+        FETCH_OP_CASE(eor, value);
+        break;
+    }
+    masm.memoryBarrierAfter(sync);
+    if (wantResult) {
+      SignOrZeroExtend(masm, type, targetWidth, output, output);
+    }
+    return;
+  }
+
+#undef FETCH_OP_CASE
+
+  // The target doesn't support atomics, so generate a LL-SC loop. This requires
+  // only AArch64 v8.0.
+  Label again;
+
+  // NOTE: the generated code must match the assembly code in gen_fetchop in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  Register scratch = temps.AcquireX().asUnsized();
+
+  masm.bind(&again);
+  LoadExclusive(masm, access, type, targetWidth, ptr, output);
+  switch (op) {
+    case AtomicFetchAddOp:
+      masm.Add(X(temp), X(output), X(value));
+      break;
+    case AtomicFetchSubOp:
+      masm.Sub(X(temp), X(output), X(value));
+      break;
+    case AtomicFetchAndOp:
+      masm.And(X(temp), X(output), X(value));
+      break;
+    case AtomicFetchOrOp:
+      masm.Orr(X(temp), X(output), X(value));
+      break;
+    case AtomicFetchXorOp:
+      masm.Eor(X(temp), X(output), X(value));
+      break;
+  }
+  StoreExclusive(masm, type, scratch, temp, ptr);
+  masm.Cbnz(W(scratch), &again);
+  if (wantResult) {
+    SignOrZeroExtend(masm, type, targetWidth, output, output);
+  }
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::compareExchange(Scalar::Type type,
+                                     const Synchronization& sync,
+                                     const Address& mem, Register oldval,
+                                     Register newval, Register output) {
+  CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
+                  output);
+}
+
+void MacroAssembler::compareExchange(Scalar::Type type,
+                                     const Synchronization& sync,
+                                     const BaseIndex& mem, Register oldval,
+                                     Register newval, Register output) {
+  CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
+                  output);
+}
+
+void MacroAssembler::compareExchange64(const Synchronization& sync,
+                                       const Address& mem, Register64 expect,
+                                       Register64 replace, Register64 output) {
+  CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
+                  expect.reg, replace.reg, output.reg);
+}
+
+void MacroAssembler::compareExchange64(const Synchronization& sync,
+                                       const BaseIndex& mem, Register64 expect,
+                                       Register64 replace, Register64 output) {
+  CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
+                  expect.reg, replace.reg, output.reg);
+}
+
+void MacroAssembler::atomicExchange64(const Synchronization& sync,
+                                      const Address& mem, Register64 value,
+                                      Register64 output) {
+  AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
+                 value.reg, output.reg);
+}
+
+void MacroAssembler::atomicExchange64(const Synchronization& sync,
+                                      const BaseIndex& mem, Register64 value,
+                                      Register64 output) {
+  AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
+                 value.reg, output.reg);
+}
+
+void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
+                                     Register64 value, const Address& mem,
+                                     Register64 temp, Register64 output) {
+  AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
+                      value.reg, temp.reg, output.reg);
+}
+
+void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
+                                     Register64 value, const BaseIndex& mem,
+                                     Register64 temp, Register64 output) {
+  AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
+                      value.reg, temp.reg, output.reg);
+}
+
+void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
+                                      Register64 value, const Address& mem,
+                                      Register64 temp) {
+  AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
+                       value.reg, temp.reg, temp.reg);
+}
+
+void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
+                                      Register64 value, const BaseIndex& mem,
+                                      Register64 temp) {
+  AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
+                       value.reg, temp.reg, temp.reg);
+}
+
+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
+                                         const Address& mem, Register oldval,
+                                         Register newval, Register output) {
+  CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
+                  oldval, newval, output);
+}
+
+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
+                                         const BaseIndex& mem, Register oldval,
+                                         Register newval, Register output) {
+  CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
+                  oldval, newval, output);
+}
+
+void MacroAssembler::atomicExchange(Scalar::Type type,
+                                    const Synchronization& sync,
+                                    const Address& mem, Register value,
+                                    Register output) {
+  AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
+}
+
+void MacroAssembler::atomicExchange(Scalar::Type type,
+                                    const Synchronization& sync,
+                                    const BaseIndex& mem, Register value,
+                                    Register output) {
+  AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
+                                        const Address& mem, Register value,
+                                        Register output) {
+  AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
+                 value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
+                                        const BaseIndex& mem, Register value,
+                                        Register output) {
+  AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
+                 value, output);
+}
+
+void MacroAssembler::atomicFetchOp(Scalar::Type type,
+                                   const Synchronization& sync, AtomicOp op,
+                                   Register value, const Address& mem,
+                                   Register temp, Register output) {
+  AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
+                      temp, output);
+}
+
+void MacroAssembler::atomicFetchOp(Scalar::Type type,
+                                   const Synchronization& sync, AtomicOp op,
+                                   Register value, const BaseIndex& mem,
+                                   Register temp, Register output) {
+  AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
+                      temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
+                                       AtomicOp op, Register value,
+                                       const Address& mem, Register temp,
+                                       Register output) {
+  AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
+                      op, mem, value, temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
+                                       AtomicOp op, Register value,
+                                       const BaseIndex& mem, Register temp,
+                                       Register output) {
+  AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
+                      op, mem, value, temp, output);
+}
+
+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
+                                        AtomicOp op, Register value,
+                                        const Address& mem, Register temp) {
+  AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
+                       op, mem, value, temp, temp);
+}
+
+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
+                                        AtomicOp op, Register value,
+                                        const BaseIndex& mem, Register temp) {
+  AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
+                       op, mem, value, temp, temp);
+}
+
+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
+                                           const Address& mem,
+                                           Register64 expect,
+                                           Register64 replace,
+                                           Register64 output) {
+  CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
+                  expect.reg, replace.reg, output.reg);
+}
+
+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
+                                           const BaseIndex& mem,
+                                           Register64 expect,
+                                           Register64 replace,
+                                           Register64 output) {
+  CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
+                  expect.reg, replace.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
+                                          const Address& mem, Register64 value,
+                                          Register64 output) {
+  AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
+                 value.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
+                                          const BaseIndex& mem,
+                                          Register64 value, Register64 output) {
+  AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
+                 value.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
+                                         AtomicOp op, Register64 value,
+                                         const Address& mem, Register64 temp,
+                                         Register64 output) {
+  AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
+                      op, mem, value.reg, temp.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
+                                         AtomicOp op, Register64 value,
+                                         const BaseIndex& mem, Register64 temp,
+                                         Register64 output) {
+  AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
+                      op, mem, value.reg, temp.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access,
+                                          AtomicOp op, Register64 value,
+                                          const BaseIndex& mem,
+                                          Register64 temp) {
+  AtomicFetchOp<false>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
+                       op, mem, value.reg, temp.reg, temp.reg);
+}
+
+// ========================================================================
+// JS atomic operations.
+
+template <typename T>
+static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
+                              const Synchronization& sync, const T& mem,
+                              Register oldval, Register newval, Register temp,
+                              AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.compareExchange(arrayType, sync, mem, oldval, newval, temp);
+    masm.convertUInt32ToDouble(temp, output.fpu());
+  } else {
+    masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr());
+  }
+}
+
+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
+                                       const Synchronization& sync,
+                                       const Address& mem, Register oldval,
+                                       Register newval, Register temp,
+                                       AnyRegister output) {
+  CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
+}
+
+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
+                                       const Synchronization& sync,
+                                       const BaseIndex& mem, Register oldval,
+                                       Register newval, Register temp,
+                                       AnyRegister output) {
+  CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
+}
+
+template <typename T>
+static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
+                             const Synchronization& sync, const T& mem,
+                             Register value, Register temp,
+                             AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.atomicExchange(arrayType, sync, mem, value, temp);
+    masm.convertUInt32ToDouble(temp, output.fpu());
+  } else {
+    masm.atomicExchange(arrayType, sync, mem, value, output.gpr());
+  }
+}
+
+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
+                                      const Synchronization& sync,
+                                      const Address& mem, Register value,
+                                      Register temp, AnyRegister output) {
+  AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
+}
+
+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
+                                      const Synchronization& sync,
+                                      const BaseIndex& mem, Register value,
+                                      Register temp, AnyRegister output) {
+  AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
+}
+
+template <typename T>
+static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType,
+                            const Synchronization& sync, AtomicOp op,
+                            Register value, const T& mem, Register temp1,
+                            Register temp2, AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1);
+    masm.convertUInt32ToDouble(temp1, output.fpu());
+  } else {
+    masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr());
+  }
+}
+
+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
+                                     const Synchronization& sync, AtomicOp op,
+                                     Register value, const Address& mem,
+                                     Register temp1, Register temp2,
+                                     AnyRegister output) {
+  AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
+}
+
+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
+                                     const Synchronization& sync, AtomicOp op,
+                                     Register value, const BaseIndex& mem,
+                                     Register temp1, Register temp2,
+                                     AnyRegister output) {
+  AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
+}
+
+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
+                                      const Synchronization& sync, AtomicOp op,
+                                      Register value, const BaseIndex& mem,
+                                      Register temp) {
+  AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
+                       value, temp, temp);
+}
+
+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
+                                      const Synchronization& sync, AtomicOp op,
+                                      Register value, const Address& mem,
+                                      Register temp) {
+  AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
+                       value, temp, temp);
+}
+
+void MacroAssembler::flexibleQuotient32(Register rhs, Register srcDest,
+                                        bool isUnsigned,
+                                        const LiveRegisterSet&) {
+  quotient32(rhs, srcDest, isUnsigned);
+}
+
+void MacroAssembler::flexibleRemainder32(Register rhs, Register srcDest,
+                                         bool isUnsigned,
+                                         const LiveRegisterSet&) {
+  remainder32(rhs, srcDest, isUnsigned);
+}
+
+void MacroAssembler::flexibleDivMod32(Register rhs, Register srcDest,
+                                      Register remOutput, bool isUnsigned,
+                                      const LiveRegisterSet&) {
+  vixl::UseScratchRegisterScope temps(this);
+  ARMRegister scratch = temps.AcquireW();
+  ARMRegister src = temps.AcquireW();
+
+  // Preserve src for remainder computation
+  Mov(src, ARMRegister(srcDest, 32));
+
+  if (isUnsigned) {
+    Udiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32));
+  } else {
+    Sdiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32));
+  }
+  // Compute remainder
+  Mul(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32));
+  Sub(ARMRegister(remOutput, 32), src, scratch);
+}
+
+CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
+  AutoForbidPoolsAndNops afp(this,
+                             /* max number of instructions in scope = */ 1);
+  CodeOffset offset(currentOffset());
+  adr(ARMRegister(dest, 64), 0, LabelDoc());
+  return offset;
+}
+
+void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
+                                          CodeLocationLabel target) {
+  ptrdiff_t off = target - loc;
+  MOZ_RELEASE_ASSERT(vixl::IsInt21(off));
+
+  Instruction* cur = reinterpret_cast<Instruction*>(loc.raw());
+  MOZ_ASSERT(cur->IsADR());
+
+  vixl::Register rd = vixl::Register::XRegFromCode(cur->Rd());
+  adr(cur, rd, off);
+}
+
+// ========================================================================
+// Spectre Mitigations.
+
+void MacroAssembler::speculationBarrier() {
+  // Conditional speculation barrier.
+  csdb();
+}
+
+void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
+                                         Label* fail) {
+  ARMFPRegister iFlt(src, 32);
+  ARMRegister o64(dest, 64);
+  ARMRegister o32(dest, 32);
+
+  Label handleZero;
+  Label fin;
+
+  // Handle ±0 and NaN first.
+  Fcmp(iFlt, 0.0);
+  B(Assembler::Equal, &handleZero);
+  // NaN is always a bail condition, just bail directly.
+  B(Assembler::Overflow, fail);
+
+  // Round towards negative infinity.
+  Fcvtms(o64, iFlt);
+
+  // Sign extend lower 32 bits to test if the result isn't an Int32.
+  Cmp(o64, Operand(o64, vixl::SXTW));
+  B(NotEqual, fail);
+
+  // Clear upper 32 bits.
+  Uxtw(o64, o64);
+  B(&fin);
+
+  bind(&handleZero);
+  // Move the top word of the float into the output reg, if it is non-zero,
+  // then the original value was -0.0.
+  Fmov(o32, iFlt);
+  Cbnz(o32, fail);
+  bind(&fin);
+}
+
+void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  ARMFPRegister iDbl(src, 64);
+  ARMRegister o64(dest, 64);
+  ARMRegister o32(dest, 32);
+
+  Label handleZero;
+  Label fin;
+
+  // Handle ±0 and NaN first.
+  Fcmp(iDbl, 0.0);
+  B(Assembler::Equal, &handleZero);
+  // NaN is always a bail condition, just bail directly.
+  B(Assembler::Overflow, fail);
+
+  // Round towards negative infinity.
+  Fcvtms(o64, iDbl);
+
+  // Sign extend lower 32 bits to test if the result isn't an Int32.
+  Cmp(o64, Operand(o64, vixl::SXTW));
+  B(NotEqual, fail);
+
+  // Clear upper 32 bits.
+  Uxtw(o64, o64);
+  B(&fin);
+
+  bind(&handleZero);
+  // Move the top word of the double into the output reg, if it is non-zero,
+  // then the original value was -0.0.
+  Fmov(o64, iDbl);
+  Cbnz(o64, fail);
+  bind(&fin);
+}
+
+void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  ARMFPRegister iFlt(src, 32);
+  ARMRegister o64(dest, 64);
+  ARMRegister o32(dest, 32);
+
+  Label handleZero;
+  Label fin;
+
+  // Round towards positive infinity.
+  Fcvtps(o64, iFlt);
+
+  // Sign extend lower 32 bits to test if the result isn't an Int32.
+  Cmp(o64, Operand(o64, vixl::SXTW));
+  B(NotEqual, fail);
+
+  // We have to check for (-1, -0] and NaN when the result is zero.
+  Cbz(o64, &handleZero);
+
+  // Clear upper 32 bits.
+  Uxtw(o64, o64);
+  B(&fin);
+
+  // Bail if the input is in (-1, -0] or NaN.
+  bind(&handleZero);
+  // Move the top word of the float into the output reg, if it is non-zero,
+  // then the original value wasn't +0.0.
+  Fmov(o32, iFlt);
+  Cbnz(o32, fail);
+  bind(&fin);
+}
+
+void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest,
+                                       Label* fail) {
+  ARMFPRegister iDbl(src, 64);
+  ARMRegister o64(dest, 64);
+  ARMRegister o32(dest, 32);
+
+  Label handleZero;
+  Label fin;
+
+  // Round towards positive infinity.
+  Fcvtps(o64, iDbl);
+
+  // Sign extend lower 32 bits to test if the result isn't an Int32.
+  Cmp(o64, Operand(o64, vixl::SXTW));
+  B(NotEqual, fail);
+
+  // We have to check for (-1, -0] and NaN when the result is zero.
+  Cbz(o64, &handleZero);
+
+  // Clear upper 32 bits.
+  Uxtw(o64, o64);
+  B(&fin);
+
+  // Bail if the input is in (-1, -0] or NaN.
+  bind(&handleZero);
+  // Move the top word of the double into the output reg, if it is non-zero,
+  // then the original value wasn't +0.0.
+  Fmov(o64, iDbl);
+  Cbnz(o64, fail);
+  bind(&fin);
+}
+
+void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest,
+                                         Label* fail) {
+  ARMFPRegister src32(src, 32);
+  ARMRegister dest32(dest, 32);
+  ARMRegister dest64(dest, 64);
+
+  Label done, zeroCase;
+
+  // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+  // In the case of overflow, the output is saturated.
+  // In the case of NaN and -0, the output is zero.
+  Fcvtzs(dest64, src32);
+
+  // If the output was zero, worry about special cases.
+  Cbz(dest64, &zeroCase);
+
+  // Sign extend lower 32 bits to test if the result isn't an Int32.
+  Cmp(dest64, Operand(dest64, vixl::SXTW));
+  B(NotEqual, fail);
+
+  // Clear upper 32 bits.
+  Uxtw(dest64, dest64);
+
+  // If the output was non-zero and wasn't saturated, just return it.
+  B(&done);
+
+  // Handle the case of a zero output:
+  // 1. The input may have been NaN, requiring a failure.
+  // 2. The input may have been in (-1,-0], requiring a failure.
+  {
+    bind(&zeroCase);
+
+    // Combine test for negative and NaN values using a single bitwise
+    // operation.
+    //
+    // | Decimal number | Bitwise representation |
+    // |----------------|------------------------|
+    // | -0             | 8000'0000              |
+    // | +0             | 0000'0000              |
+    // | +1             | 3f80'0000              |
+    // |  NaN (or +Inf) | 7fyx'xxxx, y >= 8      |
+    // | -NaN (or -Inf) | ffyx'xxxx, y >= 8      |
+    //
+    // If any of two most significant bits is set, the number isn't in [0, 1).
+    // (Recall that floating point numbers, except for NaN, are strictly ordered
+    // when comparing their bitwise representation as signed integers.)
+
+    Fmov(dest32, src32);
+    Lsr(dest32, dest32, 30);
+    Cbnz(dest32, fail);
+  }
+
+  bind(&done);
+}
+
+void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  ARMFPRegister src64(src, 64);
+  ARMRegister dest64(dest, 64);
+  ARMRegister dest32(dest, 32);
+
+  Label done, zeroCase;
+
+  // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+  // In the case of overflow, the output is saturated.
+  // In the case of NaN and -0, the output is zero.
+  Fcvtzs(dest64, src64);
+
+  // If the output was zero, worry about special cases.
+  Cbz(dest64, &zeroCase);
+
+  // Sign extend lower 32 bits to test if the result isn't an Int32.
+  Cmp(dest64, Operand(dest64, vixl::SXTW));
+  B(NotEqual, fail);
+
+  // Clear upper 32 bits.
+  Uxtw(dest64, dest64);
+
+  // If the output was non-zero and wasn't saturated, just return it.
+  B(&done);
+
+  // Handle the case of a zero output:
+  // 1. The input may have been NaN, requiring a failure.
+  // 2. The input may have been in (-1,-0], requiring a failure.
+  {
+    bind(&zeroCase);
+
+    // Combine test for negative and NaN values using a single bitwise
+    // operation.
+    //
+    // | Decimal number | Bitwise representation |
+    // |----------------|------------------------|
+    // | -0             | 8000'0000'0000'0000    |
+    // | +0             | 0000'0000'0000'0000    |
+    // | +1             | 3ff0'0000'0000'0000    |
+    // |  NaN (or +Inf) | 7ffx'xxxx'xxxx'xxxx    |
+    // | -NaN (or -Inf) | fffx'xxxx'xxxx'xxxx    |
+    //
+    // If any of two most significant bits is set, the number isn't in [0, 1).
+    // (Recall that floating point numbers, except for NaN, are strictly ordered
+    // when comparing their bitwise representation as signed integers.)
+
+    Fmov(dest64, src64);
+    Lsr(dest64, dest64, 62);
+    Cbnz(dest64, fail);
+  }
+
+  bind(&done);
+}
+
+void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest,
+                                         FloatRegister temp, Label* fail) {
+  ARMFPRegister src32(src, 32);
+  ARMRegister dest32(dest, 32);
+  ARMRegister dest64(dest, 64);
+
+  Label negative, saturated, done;
+
+  // Branch to a slow path if input < 0.0 due to complicated rounding rules.
+  // Note that Fcmp with NaN unsets the negative flag.
+  Fcmp(src32, 0.0);
+  B(&negative, Assembler::Condition::lo);
+
+  // Handle the simple case of a positive input, and also -0 and NaN.
+  // Rounding proceeds with consideration of the fractional part of the input:
+  // 1. If > 0.5, round to integer with higher absolute value (so, up).
+  // 2. If < 0.5, round to integer with lower absolute value (so, down).
+  // 3. If = 0.5, round to +Infinity (so, up).
+  {
+    // Convert to signed 64-bit integer, rounding halfway cases away from zero.
+    // In the case of overflow, the output is saturated.
+    // In the case of NaN and -0, the output is zero.
+    Fcvtas(dest64, src32);
+
+    // In the case of zero, the input may have been NaN or -0, which must bail.
+    Cbnz(dest64, &saturated);
+
+    // Combine test for -0 and NaN values using a single bitwise operation.
+    // See truncFloat32ToInt32 for an explanation.
+    Fmov(dest32, src32);
+    Lsr(dest32, dest32, 30);
+    Cbnz(dest32, fail);
+
+    B(&done);
+  }
+
+  // Handle the complicated case of a negative input.
+  // Rounding proceeds with consideration of the fractional part of the input:
+  // 1. If > 0.5, round to integer with higher absolute value (so, down).
+  // 2. If < 0.5, round to integer with lower absolute value (so, up).
+  // 3. If = 0.5, round to +Infinity (so, up).
+  bind(&negative);
+  {
+    // Inputs in [-0.5, 0) are rounded to -0. Fail.
+    loadConstantFloat32(-0.5f, temp);
+    branchFloat(Assembler::DoubleGreaterThanOrEqual, src, temp, fail);
+
+    // Other negative inputs need the biggest double less than 0.5 added.
+    loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
+    addFloat32(src, temp);
+
+    // Round all values toward -Infinity.
+    // In the case of overflow, the output is saturated.
+    // NaN and -0 are already handled by the "positive number" path above.
+    Fcvtms(dest64, temp);
+  }
+
+  bind(&saturated);
+
+  // Sign extend lower 32 bits to test if the result isn't an Int32.
+  Cmp(dest64, Operand(dest64, vixl::SXTW));
+  B(NotEqual, fail);
+
+  // Clear upper 32 bits.
+  Uxtw(dest64, dest64);
+
+  bind(&done);
+}
+
+void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest,
+                                        FloatRegister temp, Label* fail) {
+  ARMFPRegister src64(src, 64);
+  ARMRegister dest64(dest, 64);
+  ARMRegister dest32(dest, 32);
+
+  Label negative, saturated, done;
+
+  // Branch to a slow path if input < 0.0 due to complicated rounding rules.
+  // Note that Fcmp with NaN unsets the negative flag.
+  Fcmp(src64, 0.0);
+  B(&negative, Assembler::Condition::lo);
+
+  // Handle the simple case of a positive input, and also -0 and NaN.
+  // Rounding proceeds with consideration of the fractional part of the input:
+  // 1. If > 0.5, round to integer with higher absolute value (so, up).
+  // 2. If < 0.5, round to integer with lower absolute value (so, down).
+  // 3. If = 0.5, round to +Infinity (so, up).
+  {
+    // Convert to signed 64-bit integer, rounding halfway cases away from zero.
+    // In the case of overflow, the output is saturated.
+    // In the case of NaN and -0, the output is zero.
+    Fcvtas(dest64, src64);
+
+    // In the case of zero, the input may have been NaN or -0, which must bail.
+    Cbnz(dest64, &saturated);
+
+    // Combine test for -0 and NaN values using a single bitwise operation.
+    // See truncDoubleToInt32 for an explanation.
+    Fmov(dest64, src64);
+    Lsr(dest64, dest64, 62);
+    Cbnz(dest64, fail);
+
+    B(&done);
+  }
+
+  // Handle the complicated case of a negative input.
+  // Rounding proceeds with consideration of the fractional part of the input:
+  // 1. If > 0.5, round to integer with higher absolute value (so, down).
+  // 2. If < 0.5, round to integer with lower absolute value (so, up).
+  // 3. If = 0.5, round to +Infinity (so, up).
+  bind(&negative);
+  {
+    // Inputs in [-0.5, 0) are rounded to -0. Fail.
+    loadConstantDouble(-0.5, temp);
+    branchDouble(Assembler::DoubleGreaterThanOrEqual, src, temp, fail);
+
+    // Other negative inputs need the biggest double less than 0.5 added.
+    loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
+    addDouble(src, temp);
+
+    // Round all values toward -Infinity.
+    // In the case of overflow, the output is saturated.
+    // NaN and -0 are already handled by the "positive number" path above.
+    Fcvtms(dest64, temp);
+  }
+
+  bind(&saturated);
+
+  // Sign extend lower 32 bits to test if the result isn't an Int32.
+  Cmp(dest64, Operand(dest64, vixl::SXTW));
+  B(NotEqual, fail);
+
+  // Clear upper 32 bits.
+  Uxtw(dest64, dest64);
+
+  bind(&done);
+}
+
+void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src,
+                                     FloatRegister dest) {
+  switch (mode) {
+    case RoundingMode::Up:
+      frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+      return;
+    case RoundingMode::Down:
+      frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+      return;
+    case RoundingMode::NearestTiesToEven:
+      frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+      return;
+    case RoundingMode::TowardsZero:
+      frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+      return;
+  }
+  MOZ_CRASH("unexpected mode");
+}
+
+void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src,
+                                      FloatRegister dest) {
+  switch (mode) {
+    case RoundingMode::Up:
+      frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+      return;
+    case RoundingMode::Down:
+      frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+      return;
+    case RoundingMode::NearestTiesToEven:
+      frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+      return;
+    case RoundingMode::TowardsZero:
+      frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+      return;
+  }
+  MOZ_CRASH("unexpected mode");
+}
+
+void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
+                                    FloatRegister output) {
+  ScratchDoubleScope scratch(*this);
+
+  // Double with only the sign bit set
+  loadConstantDouble(-0.0, scratch);
+
+  if (lhs != output) {
+    moveDouble(lhs, output);
+  }
+
+  bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
+      ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
+      ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
+}
+
+void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs,
+                                     FloatRegister output) {
+  ScratchFloat32Scope scratch(*this);
+
+  // Float with only the sign bit set
+  loadConstantFloat32(-0.0f, scratch);
+
+  if (lhs != output) {
+    moveFloat32(lhs, output);
+  }
+
+  bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
+      ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
+      ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
+}
+
+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
+                                        Register pointer) {
+  Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64),
+      Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift));
+}
+
+//}}} check_macroassembler_style
+
+}  // namespace jit
+}  // namespace js
diff --git a/js/src/jit/arm64/MacroAssembler-arm64.h b/js/src/jit/arm64/MacroAssembler-arm64.h
new file mode 100644
index 0000000000..edfd8c9d3e
--- /dev/null
+++ b/js/src/jit/arm64/MacroAssembler-arm64.h
@@ -0,0 +1,2206 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_MacroAssembler_arm64_h
+#define jit_arm64_MacroAssembler_arm64_h
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/arm64/vixl/Debugger-vixl.h"
+#include "jit/arm64/vixl/MacroAssembler-vixl.h"
+#include "jit/AtomicOp.h"
+#include "jit/MoveResolver.h"
+#include "vm/BigIntType.h"  // JS::BigInt
+#include "wasm/WasmBuiltins.h"
+
+#ifdef _M_ARM64
+#  ifdef move32
+#    undef move32
+#  endif
+#  ifdef move64
+#    undef move64
+#  endif
+#endif
+
+namespace js {
+namespace jit {
+
+// Import VIXL operands directly into the jit namespace for shared code.
+using vixl::MemOperand;
+using vixl::Operand;
+
+struct ImmShiftedTag : public ImmWord {
+  explicit ImmShiftedTag(JSValueShiftedTag shtag) : ImmWord((uintptr_t)shtag) {}
+
+  explicit ImmShiftedTag(JSValueType type)
+      : ImmWord(uintptr_t(JSValueShiftedTag(JSVAL_TYPE_TO_SHIFTED_TAG(type)))) {
+  }
+};
+
+struct ImmTag : public Imm32 {
+  explicit ImmTag(JSValueTag tag) : Imm32(tag) {}
+};
+
+class ScratchTagScope;
+
+class MacroAssemblerCompat : public vixl::MacroAssembler {
+ public:
+  typedef vixl::Condition Condition;
+
+ private:
+  // Perform a downcast. Should be removed by Bug 996602.
+  js::jit::MacroAssembler& asMasm();
+  const js::jit::MacroAssembler& asMasm() const;
+
+ public:
+  // Restrict to only VIXL-internal functions.
+  vixl::MacroAssembler& asVIXL();
+  const MacroAssembler& asVIXL() const;
+
+ protected:
+  bool enoughMemory_;
+  uint32_t framePushed_;
+
+  MacroAssemblerCompat()
+      : vixl::MacroAssembler(), enoughMemory_(true), framePushed_(0) {}
+
+ protected:
+  MoveResolver moveResolver_;
+
+ public:
+  bool oom() const { return Assembler::oom() || !enoughMemory_; }
+  static ARMRegister toARMRegister(RegisterOrSP r, size_t size) {
+    if (IsHiddenSP(r)) {
+      MOZ_ASSERT(size == 64);
+      return sp;
+    }
+    return ARMRegister(AsRegister(r), size);
+  }
+  static MemOperand toMemOperand(const Address& a) {
+    return MemOperand(toARMRegister(a.base, 64), a.offset);
+  }
+  void doBaseIndex(const vixl::CPURegister& rt, const BaseIndex& addr,
+                   vixl::LoadStoreOp op) {
+    const ARMRegister base = toARMRegister(addr.base, 64);
+    const ARMRegister index = ARMRegister(addr.index, 64);
+    const unsigned scale = addr.scale;
+
+    if (!addr.offset &&
+        (!scale || scale == static_cast<unsigned>(CalcLSDataSize(op)))) {
+      LoadStoreMacro(rt, MemOperand(base, index, vixl::LSL, scale), op);
+      return;
+    }
+
+    vixl::UseScratchRegisterScope temps(this);
+    ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT(!scratch64.Is(rt));
+    MOZ_ASSERT(!scratch64.Is(base));
+    MOZ_ASSERT(!scratch64.Is(index));
+
+    Add(scratch64, base, Operand(index, vixl::LSL, scale));
+    LoadStoreMacro(rt, MemOperand(scratch64, addr.offset), op);
+  }
+  void Push(ARMRegister reg) {
+    push(reg);
+    adjustFrame(reg.size() / 8);
+  }
+  void Push(Register reg) {
+    vixl::MacroAssembler::Push(ARMRegister(reg, 64));
+    adjustFrame(8);
+  }
+  void Push(Imm32 imm) {
+    push(imm);
+    adjustFrame(8);
+  }
+  void Push(FloatRegister f) {
+    push(ARMFPRegister(f, 64));
+    adjustFrame(8);
+  }
+  void Push(ImmPtr imm) {
+    push(imm);
+    adjustFrame(sizeof(void*));
+  }
+  void push(FloatRegister f) {
+    vixl::MacroAssembler::Push(ARMFPRegister(f, 64));
+  }
+  void push(ARMFPRegister f) { vixl::MacroAssembler::Push(f); }
+  void push(Imm32 imm) {
+    if (imm.value == 0) {
+      vixl::MacroAssembler::Push(vixl::xzr);
+    } else {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch64 = temps.AcquireX();
+      move32(imm, scratch64.asUnsized());
+      vixl::MacroAssembler::Push(scratch64);
+    }
+  }
+  void push(ImmWord imm) {
+    if (imm.value == 0) {
+      vixl::MacroAssembler::Push(vixl::xzr);
+    } else {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch64 = temps.AcquireX();
+      Mov(scratch64, imm.value);
+      vixl::MacroAssembler::Push(scratch64);
+    }
+  }
+  void push(ImmPtr imm) {
+    if (imm.value == nullptr) {
+      vixl::MacroAssembler::Push(vixl::xzr);
+    } else {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch64 = temps.AcquireX();
+      movePtr(imm, scratch64.asUnsized());
+      vixl::MacroAssembler::Push(scratch64);
+    }
+  }
+  void push(ImmGCPtr imm) {
+    if (imm.value == nullptr) {
+      vixl::MacroAssembler::Push(vixl::xzr);
+    } else {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch64 = temps.AcquireX();
+      movePtr(imm, scratch64.asUnsized());
+      vixl::MacroAssembler::Push(scratch64);
+    }
+  }
+  void push(ARMRegister reg) { vixl::MacroAssembler::Push(reg); }
+  void push(Address a) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT(a.base != scratch64.asUnsized());
+    loadPtr(a, scratch64.asUnsized());
+    vixl::MacroAssembler::Push(scratch64);
+  }
+
+  // Push registers.
+  void push(Register reg) { vixl::MacroAssembler::Push(ARMRegister(reg, 64)); }
+  void push(RegisterOrSP reg) {
+    if (IsHiddenSP(reg)) {
+      vixl::MacroAssembler::Push(sp);
+    }
+    vixl::MacroAssembler::Push(toARMRegister(reg, 64));
+  }
+  void push(Register r0, Register r1) {
+    vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64));
+  }
+  void push(Register r0, Register r1, Register r2) {
+    vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64),
+                               ARMRegister(r2, 64));
+  }
+  void push(Register r0, Register r1, Register r2, Register r3) {
+    vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64),
+                               ARMRegister(r2, 64), ARMRegister(r3, 64));
+  }
+  void push(ARMFPRegister r0, ARMFPRegister r1, ARMFPRegister r2,
+            ARMFPRegister r3) {
+    vixl::MacroAssembler::Push(r0, r1, r2, r3);
+  }
+
+  // Pop registers.
+  void pop(Register reg) { vixl::MacroAssembler::Pop(ARMRegister(reg, 64)); }
+  void pop(Register r0, Register r1) {
+    vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64));
+  }
+  void pop(Register r0, Register r1, Register r2) {
+    vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64),
+                              ARMRegister(r2, 64));
+  }
+  void pop(Register r0, Register r1, Register r2, Register r3) {
+    vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64),
+                              ARMRegister(r2, 64), ARMRegister(r3, 64));
+  }
+  void pop(ARMFPRegister r0, ARMFPRegister r1, ARMFPRegister r2,
+           ARMFPRegister r3) {
+    vixl::MacroAssembler::Pop(r0, r1, r2, r3);
+  }
+
+  void pop(const ValueOperand& v) { pop(v.valueReg()); }
+  void pop(const FloatRegister& f) {
+    vixl::MacroAssembler::Pop(ARMFPRegister(f, 64));
+  }
+
+  void implicitPop(uint32_t args) {
+    MOZ_ASSERT(args % sizeof(intptr_t) == 0);
+    adjustFrame(0 - args);
+  }
+  void Pop(ARMRegister r) {
+    vixl::MacroAssembler::Pop(r);
+    adjustFrame(0 - r.size() / 8);
+  }
+  // FIXME: This is the same on every arch.
+  // FIXME: If we can share framePushed_, we can share this.
+  // FIXME: Or just make it at the highest level.
+  CodeOffset PushWithPatch(ImmWord word) {
+    framePushed_ += sizeof(word.value);
+    return pushWithPatch(word);
+  }
+  CodeOffset PushWithPatch(ImmPtr ptr) {
+    return PushWithPatch(ImmWord(uintptr_t(ptr.value)));
+  }
+
+  uint32_t framePushed() const { return framePushed_; }
+  void adjustFrame(int32_t diff) { setFramePushed(framePushed_ + diff); }
+
+  void setFramePushed(uint32_t framePushed) { framePushed_ = framePushed; }
+
+  void freeStack(Register amount) {
+    vixl::MacroAssembler::Drop(Operand(ARMRegister(amount, 64)));
+  }
+
+  // Update sp with the value of the current active stack pointer, if necessary.
+  void syncStackPtr() {
+    if (!GetStackPointer64().Is(vixl::sp)) {
+      Mov(vixl::sp, GetStackPointer64());
+    }
+  }
+  void initPseudoStackPtr() {
+    if (!GetStackPointer64().Is(vixl::sp)) {
+      Mov(GetStackPointer64(), vixl::sp);
+    }
+  }
+  // In debug builds only, cause a trap if PSP is active and PSP != SP
+  void assertStackPtrsSynced(uint32_t id) {
+#ifdef DEBUG
+    // The add and sub instructions below will only take a 12-bit immediate.
+    MOZ_ASSERT(id <= 0xFFF);
+    if (!GetStackPointer64().Is(vixl::sp)) {
+      Label ok;
+      // Add a marker, so we can figure out who requested the check when
+      // inspecting the generated code.  Note, a more concise way to encode
+      // the marker would be to use it as an immediate for the `brk`
+      // instruction as generated by `Unreachable()`, and removing the add/sub.
+      Add(GetStackPointer64(), GetStackPointer64(), Operand(id));
+      Sub(GetStackPointer64(), GetStackPointer64(), Operand(id));
+      Cmp(vixl::sp, GetStackPointer64());
+      B(Equal, &ok);
+      Unreachable();
+      bind(&ok);
+    }
+#endif
+  }
+  // In debug builds only, add a marker that doesn't change the machine's
+  // state.  Note these markers are x16-based, as opposed to the x28-based
+  // ones made by `assertStackPtrsSynced`.
+  void addMarker(uint32_t id) {
+#ifdef DEBUG
+    // Only 12 bits of immediate are allowed.
+    MOZ_ASSERT(id <= 0xFFF);
+    ARMRegister x16 = ARMRegister(r16, 64);
+    Add(x16, x16, Operand(id));
+    Sub(x16, x16, Operand(id));
+#endif
+  }
+
+  void storeValue(ValueOperand val, const Address& dest) {
+    storePtr(val.valueReg(), dest);
+  }
+
+  template <typename T>
+  void storeValue(JSValueType type, Register reg, const T& dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != reg);
+    tagValue(type, reg, ValueOperand(scratch));
+    storeValue(ValueOperand(scratch), dest);
+  }
+  template <typename T>
+  void storeValue(const Value& val, const T& dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    moveValue(val, ValueOperand(scratch));
+    storeValue(ValueOperand(scratch), dest);
+  }
+  void storeValue(ValueOperand val, BaseIndex dest) {
+    storePtr(val.valueReg(), dest);
+  }
+  void storeValue(const Address& src, const Address& dest, Register temp) {
+    loadPtr(src, temp);
+    storePtr(temp, dest);
+  }
+
+  void storePrivateValue(Register src, const Address& dest) {
+    storePtr(src, dest);
+  }
+  void storePrivateValue(ImmGCPtr imm, const Address& dest) {
+    storePtr(imm, dest);
+  }
+
+  void loadValue(Address src, Register val) {
+    Ldr(ARMRegister(val, 64), MemOperand(src));
+  }
+  void loadValue(Address src, ValueOperand val) {
+    Ldr(ARMRegister(val.valueReg(), 64), MemOperand(src));
+  }
+  void loadValue(const BaseIndex& src, ValueOperand val) {
+    doBaseIndex(ARMRegister(val.valueReg(), 64), src, vixl::LDR_x);
+  }
+  void loadUnalignedValue(const Address& src, ValueOperand dest) {
+    loadValue(src, dest);
+  }
+  void tagValue(JSValueType type, Register payload, ValueOperand dest) {
+    // This could be cleverer, but the first attempt had bugs.
+    Orr(ARMRegister(dest.valueReg(), 64), ARMRegister(payload, 64),
+        Operand(ImmShiftedTag(type).value));
+  }
+  void pushValue(ValueOperand val) {
+    vixl::MacroAssembler::Push(ARMRegister(val.valueReg(), 64));
+  }
+  void popValue(ValueOperand val) {
+    vixl::MacroAssembler::Pop(ARMRegister(val.valueReg(), 64));
+    // SP may be < PSP now (that's OK).
+    // eg testcase: tests/backup-point-bug1315634.js
+  }
+  void pushValue(const Value& val) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    if (val.isGCThing()) {
+      BufferOffset load =
+          movePatchablePtr(ImmPtr(val.bitsAsPunboxPointer()), scratch);
+      writeDataRelocation(val, load);
+      push(scratch);
+    } else {
+      moveValue(val, scratch);
+      push(scratch);
+    }
+  }
+  void pushValue(JSValueType type, Register reg) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != reg);
+    tagValue(type, reg, ValueOperand(scratch));
+    push(scratch);
+  }
+  void pushValue(const Address& addr) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != addr.base);
+    loadValue(addr, scratch);
+    push(scratch);
+  }
+  void pushValue(const BaseIndex& addr, Register scratch) {
+    loadValue(addr, ValueOperand(scratch));
+    pushValue(ValueOperand(scratch));
+  }
+  template <typename T>
+  void storeUnboxedPayload(ValueOperand value, T address, size_t nbytes,
+                           JSValueType type) {
+    switch (nbytes) {
+      case 8: {
+        vixl::UseScratchRegisterScope temps(this);
+        const Register scratch = temps.AcquireX().asUnsized();
+        if (type == JSVAL_TYPE_OBJECT) {
+          unboxObjectOrNull(value, scratch);
+        } else {
+          unboxNonDouble(value, scratch, type);
+        }
+        storePtr(scratch, address);
+        return;
+      }
+      case 4:
+        store32(value.valueReg(), address);
+        return;
+      case 1:
+        store8(value.valueReg(), address);
+        return;
+      default:
+        MOZ_CRASH("Bad payload width");
+    }
+  }
+  void moveValue(const Value& val, Register dest) {
+    if (val.isGCThing()) {
+      BufferOffset load =
+          movePatchablePtr(ImmPtr(val.bitsAsPunboxPointer()), dest);
+      writeDataRelocation(val, load);
+    } else {
+      movePtr(ImmWord(val.asRawBits()), dest);
+    }
+  }
+  void moveValue(const Value& src, const ValueOperand& dest) {
+    moveValue(src, dest.valueReg());
+  }
+
+  CodeOffset pushWithPatch(ImmWord imm) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    CodeOffset label = movWithPatch(imm, scratch);
+    push(scratch);
+    return label;
+  }
+
+  CodeOffset movWithPatch(ImmWord imm, Register dest) {
+    BufferOffset off = immPool64(ARMRegister(dest, 64), imm.value);
+    return CodeOffset(off.getOffset());
+  }
+  CodeOffset movWithPatch(ImmPtr imm, Register dest) {
+    BufferOffset off = immPool64(ARMRegister(dest, 64), uint64_t(imm.value));
+    return CodeOffset(off.getOffset());
+  }
+
+  void boxValue(JSValueType type, Register src, Register dest);
+
+  void splitSignExtTag(Register src, Register dest) {
+    sbfx(ARMRegister(dest, 64), ARMRegister(src, 64), JSVAL_TAG_SHIFT,
+         (64 - JSVAL_TAG_SHIFT));
+  }
+  [[nodiscard]] Register extractTag(const Address& address, Register scratch) {
+    loadPtr(address, scratch);
+    splitSignExtTag(scratch, scratch);
+    return scratch;
+  }
+  [[nodiscard]] Register extractTag(const ValueOperand& value,
+                                    Register scratch) {
+    splitSignExtTag(value.valueReg(), scratch);
+    return scratch;
+  }
+  [[nodiscard]] Register extractObject(const Address& address,
+                                       Register scratch) {
+    loadPtr(address, scratch);
+    unboxObject(scratch, scratch);
+    return scratch;
+  }
+  [[nodiscard]] Register extractObject(const ValueOperand& value,
+                                       Register scratch) {
+    unboxObject(value, scratch);
+    return scratch;
+  }
+  [[nodiscard]] Register extractSymbol(const ValueOperand& value,
+                                       Register scratch) {
+    unboxSymbol(value, scratch);
+    return scratch;
+  }
+  [[nodiscard]] Register extractInt32(const ValueOperand& value,
+                                      Register scratch) {
+    unboxInt32(value, scratch);
+    return scratch;
+  }
+  [[nodiscard]] Register extractBoolean(const ValueOperand& value,
+                                        Register scratch) {
+    unboxBoolean(value, scratch);
+    return scratch;
+  }
+
+  inline void ensureDouble(const ValueOperand& source, FloatRegister dest,
+                           Label* failure);
+
+  void emitSet(Condition cond, Register dest) {
+    Cset(ARMRegister(dest, 64), cond);
+  }
+
+  void testNullSet(Condition cond, const ValueOperand& value, Register dest) {
+    cond = testNull(cond, value);
+    emitSet(cond, dest);
+  }
+  void testObjectSet(Condition cond, const ValueOperand& value, Register dest) {
+    cond = testObject(cond, value);
+    emitSet(cond, dest);
+  }
+  void testUndefinedSet(Condition cond, const ValueOperand& value,
+                        Register dest) {
+    cond = testUndefined(cond, value);
+    emitSet(cond, dest);
+  }
+
+  void convertBoolToInt32(Register source, Register dest) {
+    Uxtb(ARMRegister(dest, 64), ARMRegister(source, 64));
+  }
+
+  void convertInt32ToDouble(Register src, FloatRegister dest) {
+    Scvtf(ARMFPRegister(dest, 64),
+          ARMRegister(src, 32));  // Uses FPCR rounding mode.
+  }
+  void convertInt32ToDouble(const Address& src, FloatRegister dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != src.base);
+    load32(src, scratch);
+    convertInt32ToDouble(scratch, dest);
+  }
+  void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != src.base);
+    MOZ_ASSERT(scratch != src.index);
+    load32(src, scratch);
+    convertInt32ToDouble(scratch, dest);
+  }
+
+  void convertInt32ToFloat32(Register src, FloatRegister dest) {
+    Scvtf(ARMFPRegister(dest, 32),
+          ARMRegister(src, 32));  // Uses FPCR rounding mode.
+  }
+  void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != src.base);
+    load32(src, scratch);
+    convertInt32ToFloat32(scratch, dest);
+  }
+
+  void convertUInt32ToDouble(Register src, FloatRegister dest) {
+    Ucvtf(ARMFPRegister(dest, 64),
+          ARMRegister(src, 32));  // Uses FPCR rounding mode.
+  }
+  void convertUInt32ToDouble(const Address& src, FloatRegister dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != src.base);
+    load32(src, scratch);
+    convertUInt32ToDouble(scratch, dest);
+  }
+
+  void convertUInt32ToFloat32(Register src, FloatRegister dest) {
+    Ucvtf(ARMFPRegister(dest, 32),
+          ARMRegister(src, 32));  // Uses FPCR rounding mode.
+  }
+  void convertUInt32ToFloat32(const Address& src, FloatRegister dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != src.base);
+    load32(src, scratch);
+    convertUInt32ToFloat32(scratch, dest);
+  }
+
+  void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
+    Fcvt(ARMFPRegister(dest, 64), ARMFPRegister(src, 32));
+  }
+  void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
+    Fcvt(ARMFPRegister(dest, 32), ARMFPRegister(src, 64));
+  }
+
+  using vixl::MacroAssembler::B;
+
+  void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
+                            bool negativeZeroCheck = true) {
+    ARMFPRegister fsrc64(src, 64);
+    ARMRegister dest32(dest, 32);
+
+    // ARMv8.3 chips support the FJCVTZS instruction, which handles exactly this
+    // logic.  But the simulator does not implement it, and when the simulator
+    // runs on ARM64 hardware we want to override vixl's detection of it.
+#if defined(JS_SIMULATOR_ARM64) && (defined(__aarch64__) || defined(_M_ARM64))
+    const bool fjscvt = false;
+#else
+    const bool fjscvt =
+        CPUHas(vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
+#endif
+    if (fjscvt) {
+      // Convert double to integer, rounding toward zero.
+      // The Z-flag is set iff the conversion is exact. -0 unsets the Z-flag.
+      Fjcvtzs(dest32, fsrc64);
+
+      if (negativeZeroCheck) {
+        B(fail, Assembler::NonZero);
+      } else {
+        Label done;
+        B(&done, Assembler::Zero);  // If conversion was exact, go to end.
+
+        // The conversion was inexact, but the caller intends to allow -0.
+
+        // Compare fsrc64 to 0.
+        // If fsrc64 == 0 and FJCVTZS conversion was inexact, then fsrc64 is -0.
+        Fcmp(fsrc64, 0.0);
+        B(fail, Assembler::NotEqual);  // Pass through -0; fail otherwise.
+
+        bind(&done);
+      }
+    } else {
+      // Older processors use a significantly slower path.
+      ARMRegister dest64(dest, 64);
+
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMFPRegister scratch64 = temps.AcquireD();
+      MOZ_ASSERT(!scratch64.Is(fsrc64));
+
+      Fcvtzs(dest32, fsrc64);    // Convert, rounding toward zero.
+      Scvtf(scratch64, dest32);  // Convert back, using FPCR rounding mode.
+      Fcmp(scratch64, fsrc64);
+      B(fail, Assembler::NotEqual);
+
+      if (negativeZeroCheck) {
+        Label nonzero;
+        Cbnz(dest32, &nonzero);
+        Fmov(dest64, fsrc64);
+        Cbnz(dest64, fail);
+        bind(&nonzero);
+      }
+    }
+  }
+  void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
+                             bool negativeZeroCheck = true) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMFPRegister scratch32 = temps.AcquireS();
+
+    ARMFPRegister fsrc(src, 32);
+    ARMRegister dest32(dest, 32);
+    ARMRegister dest64(dest, 64);
+
+    MOZ_ASSERT(!scratch32.Is(fsrc));
+
+    Fcvtzs(dest64, fsrc);      // Convert, rounding toward zero.
+    Scvtf(scratch32, dest32);  // Convert back, using FPCR rounding mode.
+    Fcmp(scratch32, fsrc);
+    B(fail, Assembler::NotEqual);
+
+    if (negativeZeroCheck) {
+      Label nonzero;
+      Cbnz(dest32, &nonzero);
+      Fmov(dest32, fsrc);
+      Cbnz(dest32, fail);
+      bind(&nonzero);
+    }
+    Uxtw(dest64, dest64);
+  }
+
+  void convertDoubleToPtr(FloatRegister src, Register dest, Label* fail,
+                          bool negativeZeroCheck = true) {
+    ARMFPRegister fsrc64(src, 64);
+    ARMRegister dest64(dest, 64);
+
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMFPRegister scratch64 = temps.AcquireD();
+    MOZ_ASSERT(!scratch64.Is(fsrc64));
+
+    // Note: we can't use the FJCVTZS instruction here because that only works
+    // for 32-bit values.
+
+    Fcvtzs(dest64, fsrc64);    // Convert, rounding toward zero.
+    Scvtf(scratch64, dest64);  // Convert back, using FPCR rounding mode.
+    Fcmp(scratch64, fsrc64);
+    B(fail, Assembler::NotEqual);
+
+    if (negativeZeroCheck) {
+      Label nonzero;
+      Cbnz(dest64, &nonzero);
+      Fmov(dest64, fsrc64);
+      Cbnz(dest64, fail);
+      bind(&nonzero);
+    }
+  }
+
+  void jump(Label* label) { B(label); }
+  void jump(JitCode* code) { branch(code); }
+  void jump(ImmPtr ptr) {
+    // It is unclear why this sync is necessary:
+    // * PSP and SP have been observed to be different in testcase
+    //   tests/asm.js/testBug1046688.js.
+    // * Removing the sync causes no failures in all of jit-tests.
+    //
+    // Also see branch(JitCode*) below. This version of jump() is called only
+    // from jump(TrampolinePtr) which is called on various very slow paths,
+    // probably only in JS.
+    syncStackPtr();
+    BufferOffset loc =
+        b(-1,
+          LabelDoc());  // The jump target will be patched by executableCopy().
+    addPendingJump(loc, ptr, RelocationKind::HARDCODED);
+  }
+  void jump(TrampolinePtr code) { jump(ImmPtr(code.value)); }
+  void jump(Register reg) { Br(ARMRegister(reg, 64)); }
+  void jump(const Address& addr) {
+    vixl::UseScratchRegisterScope temps(this);
+    MOZ_ASSERT(temps.IsAvailable(ScratchReg64));  // ip0
+    temps.Exclude(ScratchReg64);
+    MOZ_ASSERT(addr.base != ScratchReg64.asUnsized());
+    loadPtr(addr, ScratchReg64.asUnsized());
+    br(ScratchReg64);
+  }
+
+  void align(int alignment) { armbuffer_.align(alignment); }
+
+  void haltingAlign(int alignment) {
+    armbuffer_.align(alignment, vixl::HLT | ImmException(0xBAAD));
+  }
+  void nopAlign(int alignment) { armbuffer_.align(alignment); }
+
+  void movePtr(Register src, Register dest) {
+    Mov(ARMRegister(dest, 64), ARMRegister(src, 64));
+  }
+  void movePtr(ImmWord imm, Register dest) {
+    Mov(ARMRegister(dest, 64), int64_t(imm.value));
+  }
+  void movePtr(ImmPtr imm, Register dest) {
+    Mov(ARMRegister(dest, 64), int64_t(imm.value));
+  }
+  void movePtr(wasm::SymbolicAddress imm, Register dest) {
+    BufferOffset off = movePatchablePtr(ImmWord(0xffffffffffffffffULL), dest);
+    append(wasm::SymbolicAccess(CodeOffset(off.getOffset()), imm));
+  }
+  void movePtr(ImmGCPtr imm, Register dest) {
+    BufferOffset load = movePatchablePtr(ImmPtr(imm.value), dest);
+    writeDataRelocation(imm, load);
+  }
+
+  void mov(ImmWord imm, Register dest) { movePtr(imm, dest); }
+  void mov(ImmPtr imm, Register dest) { movePtr(imm, dest); }
+  void mov(wasm::SymbolicAddress imm, Register dest) { movePtr(imm, dest); }
+  void mov(Register src, Register dest) { movePtr(src, dest); }
+  void mov(CodeLabel* label, Register dest);
+
+  void move32(Imm32 imm, Register dest) {
+    Mov(ARMRegister(dest, 32), (int64_t)imm.value);
+  }
+  void move32(Register src, Register dest) {
+    Mov(ARMRegister(dest, 32), ARMRegister(src, 32));
+  }
+
+  // Move a pointer using a literal pool, so that the pointer
+  // may be easily patched or traced.
+  // Returns the BufferOffset of the load instruction emitted.
+  BufferOffset movePatchablePtr(ImmWord ptr, Register dest);
+  BufferOffset movePatchablePtr(ImmPtr ptr, Register dest);
+
+  void loadPtr(wasm::SymbolicAddress address, Register dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch = temps.AcquireX();
+    movePtr(address, scratch.asUnsized());
+    Ldr(ARMRegister(dest, 64), MemOperand(scratch));
+  }
+  void loadPtr(AbsoluteAddress address, Register dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch = temps.AcquireX();
+    movePtr(ImmWord((uintptr_t)address.addr), scratch.asUnsized());
+    Ldr(ARMRegister(dest, 64), MemOperand(scratch));
+  }
+  void loadPtr(const Address& address, Register dest) {
+    Ldr(ARMRegister(dest, 64), MemOperand(address));
+  }
+  void loadPtr(const BaseIndex& src, Register dest) {
+    ARMRegister base = toARMRegister(src.base, 64);
+    uint32_t scale = Imm32::ShiftOf(src.scale).value;
+    ARMRegister dest64(dest, 64);
+    ARMRegister index64(src.index, 64);
+
+    if (src.offset) {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch = temps.AcquireX();
+      MOZ_ASSERT(!scratch.Is(base));
+      MOZ_ASSERT(!scratch.Is(dest64));
+      MOZ_ASSERT(!scratch.Is(index64));
+
+      Add(scratch, base, Operand(int64_t(src.offset)));
+      Ldr(dest64, MemOperand(scratch, index64, vixl::LSL, scale));
+      return;
+    }
+
+    Ldr(dest64, MemOperand(base, index64, vixl::LSL, scale));
+  }
+  void loadPrivate(const Address& src, Register dest);
+
+  void store8(Register src, const Address& address) {
+    Strb(ARMRegister(src, 32), toMemOperand(address));
+  }
+  void store8(Imm32 imm, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != address.base);
+    move32(imm, scratch32.asUnsized());
+    Strb(scratch32, toMemOperand(address));
+  }
+  void store8(Register src, const BaseIndex& address) {
+    doBaseIndex(ARMRegister(src, 32), address, vixl::STRB_w);
+  }
+  void store8(Imm32 imm, const BaseIndex& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != address.base);
+    MOZ_ASSERT(scratch32.asUnsized() != address.index);
+    Mov(scratch32, Operand(imm.value));
+    doBaseIndex(scratch32, address, vixl::STRB_w);
+  }
+
+  void store16(Register src, const Address& address) {
+    Strh(ARMRegister(src, 32), toMemOperand(address));
+  }
+  void store16(Imm32 imm, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != address.base);
+    move32(imm, scratch32.asUnsized());
+    Strh(scratch32, toMemOperand(address));
+  }
+  void store16(Register src, const BaseIndex& address) {
+    doBaseIndex(ARMRegister(src, 32), address, vixl::STRH_w);
+  }
+  void store16(Imm32 imm, const BaseIndex& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != address.base);
+    MOZ_ASSERT(scratch32.asUnsized() != address.index);
+    Mov(scratch32, Operand(imm.value));
+    doBaseIndex(scratch32, address, vixl::STRH_w);
+  }
+  template <typename S, typename T>
+  void store16Unaligned(const S& src, const T& dest) {
+    store16(src, dest);
+  }
+
+  void storePtr(ImmWord imm, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != address.base);
+    movePtr(imm, scratch);
+    storePtr(scratch, address);
+  }
+  void storePtr(ImmPtr imm, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT(scratch64.asUnsized() != address.base);
+    Mov(scratch64, uint64_t(imm.value));
+    Str(scratch64, toMemOperand(address));
+  }
+  void storePtr(ImmGCPtr imm, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != address.base);
+    movePtr(imm, scratch);
+    storePtr(scratch, address);
+  }
+  void storePtr(Register src, const Address& address) {
+    Str(ARMRegister(src, 64), toMemOperand(address));
+  }
+
+  void storePtr(ImmWord imm, const BaseIndex& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT(scratch64.asUnsized() != address.base);
+    MOZ_ASSERT(scratch64.asUnsized() != address.index);
+    Mov(scratch64, Operand(imm.value));
+    doBaseIndex(scratch64, address, vixl::STR_x);
+  }
+  void storePtr(ImmGCPtr imm, const BaseIndex& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != address.base);
+    MOZ_ASSERT(scratch != address.index);
+    movePtr(imm, scratch);
+    doBaseIndex(ARMRegister(scratch, 64), address, vixl::STR_x);
+  }
+  void storePtr(Register src, const BaseIndex& address) {
+    doBaseIndex(ARMRegister(src, 64), address, vixl::STR_x);
+  }
+
+  void storePtr(Register src, AbsoluteAddress address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    Mov(scratch64, uint64_t(address.addr));
+    Str(ARMRegister(src, 64), MemOperand(scratch64));
+  }
+
+  void store32(Register src, AbsoluteAddress address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    Mov(scratch64, uint64_t(address.addr));
+    Str(ARMRegister(src, 32), MemOperand(scratch64));
+  }
+  void store32(Imm32 imm, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != address.base);
+    Mov(scratch32, uint64_t(imm.value));
+    Str(scratch32, toMemOperand(address));
+  }
+  void store32(Register r, const Address& address) {
+    Str(ARMRegister(r, 32), toMemOperand(address));
+  }
+  void store32(Imm32 imm, const BaseIndex& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != address.base);
+    MOZ_ASSERT(scratch32.asUnsized() != address.index);
+    Mov(scratch32, imm.value);
+    doBaseIndex(scratch32, address, vixl::STR_w);
+  }
+  void store32(Register r, const BaseIndex& address) {
+    doBaseIndex(ARMRegister(r, 32), address, vixl::STR_w);
+  }
+
+  template <typename S, typename T>
+  void store32Unaligned(const S& src, const T& dest) {
+    store32(src, dest);
+  }
+
+  void store64(Register64 src, Address address) { storePtr(src.reg, address); }
+
+  void store64(Register64 src, const BaseIndex& address) {
+    storePtr(src.reg, address);
+  }
+
+  void store64(Imm64 imm, const BaseIndex& address) {
+    storePtr(ImmWord(imm.value), address);
+  }
+
+  void store64(Imm64 imm, const Address& address) {
+    storePtr(ImmWord(imm.value), address);
+  }
+
+  template <typename S, typename T>
+  void store64Unaligned(const S& src, const T& dest) {
+    store64(src, dest);
+  }
+
+  // StackPointer manipulation.
+  inline void addToStackPtr(Register src);
+  inline void addToStackPtr(Imm32 imm);
+  inline void addToStackPtr(const Address& src);
+  inline void addStackPtrTo(Register dest);
+
+  inline void subFromStackPtr(Register src);
+  inline void subFromStackPtr(Imm32 imm);
+  inline void subStackPtrFrom(Register dest);
+
+  inline void andToStackPtr(Imm32 t);
+
+  inline void moveToStackPtr(Register src);
+  inline void moveStackPtrTo(Register dest);
+
+  inline void loadStackPtr(const Address& src);
+  inline void storeStackPtr(const Address& dest);
+
+  // StackPointer testing functions.
+  inline void branchTestStackPtr(Condition cond, Imm32 rhs, Label* label);
+  inline void branchStackPtr(Condition cond, Register rhs, Label* label);
+  inline void branchStackPtrRhs(Condition cond, Address lhs, Label* label);
+  inline void branchStackPtrRhs(Condition cond, AbsoluteAddress lhs,
+                                Label* label);
+
+  void testPtr(Register lhs, Register rhs) {
+    Tst(ARMRegister(lhs, 64), Operand(ARMRegister(rhs, 64)));
+  }
+  void test32(Register lhs, Register rhs) {
+    Tst(ARMRegister(lhs, 32), Operand(ARMRegister(rhs, 32)));
+  }
+  void test32(const Address& addr, Imm32 imm) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != addr.base);
+    load32(addr, scratch32.asUnsized());
+    Tst(scratch32, Operand(imm.value));
+  }
+  void test32(Register lhs, Imm32 rhs) {
+    Tst(ARMRegister(lhs, 32), Operand(rhs.value));
+  }
+  void cmp32(Register lhs, Imm32 rhs) {
+    Cmp(ARMRegister(lhs, 32), Operand(rhs.value));
+  }
+  void cmp32(Register a, Register b) {
+    Cmp(ARMRegister(a, 32), Operand(ARMRegister(b, 32)));
+  }
+  void cmp32(const Address& lhs, Imm32 rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != lhs.base);
+    Ldr(scratch32, toMemOperand(lhs));
+    Cmp(scratch32, Operand(rhs.value));
+  }
+  void cmp32(const Address& lhs, Register rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != lhs.base);
+    MOZ_ASSERT(scratch32.asUnsized() != rhs);
+    Ldr(scratch32, toMemOperand(lhs));
+    Cmp(scratch32, Operand(ARMRegister(rhs, 32)));
+  }
+  void cmp32(const vixl::Operand& lhs, Imm32 rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    Mov(scratch32, lhs);
+    Cmp(scratch32, Operand(rhs.value));
+  }
+  void cmp32(const vixl::Operand& lhs, Register rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    Mov(scratch32, lhs);
+    Cmp(scratch32, Operand(ARMRegister(rhs, 32)));
+  }
+
+  void cmn32(Register lhs, Imm32 rhs) {
+    Cmn(ARMRegister(lhs, 32), Operand(rhs.value));
+  }
+
+  void cmpPtr(Register lhs, Imm32 rhs) {
+    Cmp(ARMRegister(lhs, 64), Operand(rhs.value));
+  }
+  void cmpPtr(Register lhs, ImmWord rhs) {
+    Cmp(ARMRegister(lhs, 64), Operand(rhs.value));
+  }
+  void cmpPtr(Register lhs, ImmPtr rhs) {
+    Cmp(ARMRegister(lhs, 64), Operand(uint64_t(rhs.value)));
+  }
+  void cmpPtr(Register lhs, Imm64 rhs) {
+    Cmp(ARMRegister(lhs, 64), Operand(uint64_t(rhs.value)));
+  }
+  void cmpPtr(Register lhs, Register rhs) {
+    Cmp(ARMRegister(lhs, 64), ARMRegister(rhs, 64));
+  }
+  void cmpPtr(Register lhs, ImmGCPtr rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != lhs);
+    movePtr(rhs, scratch);
+    cmpPtr(lhs, scratch);
+  }
+
+  void cmpPtr(const Address& lhs, Register rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT(scratch64.asUnsized() != lhs.base);
+    MOZ_ASSERT(scratch64.asUnsized() != rhs);
+    Ldr(scratch64, toMemOperand(lhs));
+    Cmp(scratch64, Operand(ARMRegister(rhs, 64)));
+  }
+  void cmpPtr(const Address& lhs, ImmWord rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT(scratch64.asUnsized() != lhs.base);
+    Ldr(scratch64, toMemOperand(lhs));
+    Cmp(scratch64, Operand(rhs.value));
+  }
+  void cmpPtr(const Address& lhs, ImmPtr rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT(scratch64.asUnsized() != lhs.base);
+    Ldr(scratch64, toMemOperand(lhs));
+    Cmp(scratch64, Operand(uint64_t(rhs.value)));
+  }
+  void cmpPtr(const Address& lhs, ImmGCPtr rhs) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != lhs.base);
+    loadPtr(lhs, scratch);
+    cmpPtr(scratch, rhs);
+  }
+
+  void loadDouble(const Address& src, FloatRegister dest) {
+    Ldr(ARMFPRegister(dest, 64), MemOperand(src));
+  }
+  void loadDouble(const BaseIndex& src, FloatRegister dest) {
+    ARMRegister base = toARMRegister(src.base, 64);
+    ARMRegister index(src.index, 64);
+
+    if (src.offset == 0) {
+      Ldr(ARMFPRegister(dest, 64),
+          MemOperand(base, index, vixl::LSL, unsigned(src.scale)));
+      return;
+    }
+
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT(scratch64.asUnsized() != src.base);
+    MOZ_ASSERT(scratch64.asUnsized() != src.index);
+
+    Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale)));
+    Ldr(ARMFPRegister(dest, 64), MemOperand(scratch64, src.offset));
+  }
+  void loadFloatAsDouble(const Address& addr, FloatRegister dest) {
+    Ldr(ARMFPRegister(dest, 32), toMemOperand(addr));
+    fcvt(ARMFPRegister(dest, 64), ARMFPRegister(dest, 32));
+  }
+  void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
+    ARMRegister base = toARMRegister(src.base, 64);
+    ARMRegister index(src.index, 64);
+    if (src.offset == 0) {
+      Ldr(ARMFPRegister(dest, 32),
+          MemOperand(base, index, vixl::LSL, unsigned(src.scale)));
+    } else {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch64 = temps.AcquireX();
+      MOZ_ASSERT(scratch64.asUnsized() != src.base);
+      MOZ_ASSERT(scratch64.asUnsized() != src.index);
+
+      Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale)));
+      Ldr(ARMFPRegister(dest, 32), MemOperand(scratch64, src.offset));
+    }
+    fcvt(ARMFPRegister(dest, 64), ARMFPRegister(dest, 32));
+  }
+
+  void loadFloat32(const Address& addr, FloatRegister dest) {
+    Ldr(ARMFPRegister(dest, 32), toMemOperand(addr));
+  }
+  void loadFloat32(const BaseIndex& src, FloatRegister dest) {
+    ARMRegister base = toARMRegister(src.base, 64);
+    ARMRegister index(src.index, 64);
+    if (src.offset == 0) {
+      Ldr(ARMFPRegister(dest, 32),
+          MemOperand(base, index, vixl::LSL, unsigned(src.scale)));
+    } else {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch64 = temps.AcquireX();
+      MOZ_ASSERT(scratch64.asUnsized() != src.base);
+      MOZ_ASSERT(scratch64.asUnsized() != src.index);
+
+      Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale)));
+      Ldr(ARMFPRegister(dest, 32), MemOperand(scratch64, src.offset));
+    }
+  }
+
+  void moveDouble(FloatRegister src, FloatRegister dest) {
+    fmov(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+  }
+  void zeroDouble(FloatRegister reg) {
+    fmov(ARMFPRegister(reg, 64), vixl::xzr);
+  }
+  void zeroFloat32(FloatRegister reg) {
+    fmov(ARMFPRegister(reg, 32), vixl::wzr);
+  }
+
+  void moveFloat32(FloatRegister src, FloatRegister dest) {
+    fmov(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+  }
+  void moveFloatAsDouble(Register src, FloatRegister dest) {
+    MOZ_CRASH("moveFloatAsDouble");
+  }
+
+  void moveSimd128(FloatRegister src, FloatRegister dest) {
+    fmov(ARMFPRegister(dest, 128), ARMFPRegister(src, 128));
+  }
+
+  void splitSignExtTag(const ValueOperand& operand, Register dest) {
+    splitSignExtTag(operand.valueReg(), dest);
+  }
+  void splitSignExtTag(const Address& operand, Register dest) {
+    loadPtr(operand, dest);
+    splitSignExtTag(dest, dest);
+  }
+  void splitSignExtTag(const BaseIndex& operand, Register dest) {
+    loadPtr(operand, dest);
+    splitSignExtTag(dest, dest);
+  }
+
+  // Extracts the tag of a value and places it in tag
+  inline void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag);
+  void cmpTag(const ValueOperand& operand, ImmTag tag) { MOZ_CRASH("cmpTag"); }
+
+  void load32(const Address& address, Register dest) {
+    Ldr(ARMRegister(dest, 32), toMemOperand(address));
+  }
+  void load32(const BaseIndex& src, Register dest) {
+    doBaseIndex(ARMRegister(dest, 32), src, vixl::LDR_w);
+  }
+  void load32(AbsoluteAddress address, Register dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    movePtr(ImmWord((uintptr_t)address.addr), scratch64.asUnsized());
+    ldr(ARMRegister(dest, 32), MemOperand(scratch64));
+  }
+  template <typename S>
+  void load32Unaligned(const S& src, Register dest) {
+    load32(src, dest);
+  }
+  void load64(const Address& address, Register64 dest) {
+    loadPtr(address, dest.reg);
+  }
+  void load64(const BaseIndex& address, Register64 dest) {
+    loadPtr(address, dest.reg);
+  }
+  template <typename S>
+  void load64Unaligned(const S& src, Register64 dest) {
+    load64(src, dest);
+  }
+
+  void load8SignExtend(const Address& address, Register dest) {
+    Ldrsb(ARMRegister(dest, 32), toMemOperand(address));
+  }
+  void load8SignExtend(const BaseIndex& src, Register dest) {
+    doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRSB_w);
+  }
+
+  void load8ZeroExtend(const Address& address, Register dest) {
+    Ldrb(ARMRegister(dest, 32), toMemOperand(address));
+  }
+  void load8ZeroExtend(const BaseIndex& src, Register dest) {
+    doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRB_w);
+  }
+
+  void load16SignExtend(const Address& address, Register dest) {
+    Ldrsh(ARMRegister(dest, 32), toMemOperand(address));
+  }
+  void load16SignExtend(const BaseIndex& src, Register dest) {
+    doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRSH_w);
+  }
+  template <typename S>
+  void load16UnalignedSignExtend(const S& src, Register dest) {
+    load16SignExtend(src, dest);
+  }
+
+  void load16ZeroExtend(const Address& address, Register dest) {
+    Ldrh(ARMRegister(dest, 32), toMemOperand(address));
+  }
+  void load16ZeroExtend(const BaseIndex& src, Register dest) {
+    doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRH_w);
+  }
+  template <typename S>
+  void load16UnalignedZeroExtend(const S& src, Register dest) {
+    load16ZeroExtend(src, dest);
+  }
+
+  void adds32(Register src, Register dest) {
+    Adds(ARMRegister(dest, 32), ARMRegister(dest, 32),
+         Operand(ARMRegister(src, 32)));
+  }
+  void adds32(Imm32 imm, Register dest) {
+    Adds(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+  }
+  void adds32(Imm32 imm, const Address& dest) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+
+    Ldr(scratch32, toMemOperand(dest));
+    Adds(scratch32, scratch32, Operand(imm.value));
+    Str(scratch32, toMemOperand(dest));
+  }
+  void adds64(Imm32 imm, Register dest) {
+    Adds(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+  }
+  void adds64(ImmWord imm, Register dest) {
+    Adds(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+  }
+  void adds64(Register src, Register dest) {
+    Adds(ARMRegister(dest, 64), ARMRegister(dest, 64),
+         Operand(ARMRegister(src, 64)));
+  }
+
+  void subs32(Imm32 imm, Register dest) {
+    Subs(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+  }
+  void subs32(Register src, Register dest) {
+    Subs(ARMRegister(dest, 32), ARMRegister(dest, 32),
+         Operand(ARMRegister(src, 32)));
+  }
+  void subs64(Imm32 imm, Register dest) {
+    Subs(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+  }
+  void subs64(Register src, Register dest) {
+    Subs(ARMRegister(dest, 64), ARMRegister(dest, 64),
+         Operand(ARMRegister(src, 64)));
+  }
+
+  void negs32(Register reg) {
+    Negs(ARMRegister(reg, 32), Operand(ARMRegister(reg, 32)));
+  }
+
+  void ret() {
+    pop(lr);
+    abiret();
+  }
+
+  void retn(Imm32 n) {
+    vixl::UseScratchRegisterScope temps(this);
+    MOZ_ASSERT(temps.IsAvailable(ScratchReg64));  // ip0
+    temps.Exclude(ScratchReg64);
+    // ip0 <- [sp]; sp += n; ret ip0
+    Ldr(ScratchReg64,
+        MemOperand(GetStackPointer64(), ptrdiff_t(n.value), vixl::PostIndex));
+    syncStackPtr();  // SP is always used to transmit the stack between calls.
+    Ret(ScratchReg64);
+  }
+
+  void j(Condition cond, Label* dest) { B(dest, cond); }
+
+  void branch(Condition cond, Label* label) { B(label, cond); }
+  void branch(JitCode* target) {
+    // It is unclear why this sync is necessary:
+    // * PSP and SP have been observed to be different in testcase
+    //   tests/async/debugger-reject-after-fulfill.js
+    // * Removing the sync causes no failures in all of jit-tests.
+    //
+    // Also see jump() above.  This is used only to implement jump(JitCode*)
+    // and only for JS, it appears.
+    syncStackPtr();
+    BufferOffset loc =
+        b(-1,
+          LabelDoc());  // The jump target will be patched by executableCopy().
+    addPendingJump(loc, ImmPtr(target->raw()), RelocationKind::JITCODE);
+  }
+
+  void compareDouble(DoubleCondition cond, FloatRegister lhs,
+                     FloatRegister rhs) {
+    Fcmp(ARMFPRegister(lhs, 64), ARMFPRegister(rhs, 64));
+  }
+
+  void compareFloat(DoubleCondition cond, FloatRegister lhs,
+                    FloatRegister rhs) {
+    Fcmp(ARMFPRegister(lhs, 32), ARMFPRegister(rhs, 32));
+  }
+
+  void compareSimd128Int(Assembler::Condition cond, ARMFPRegister dest,
+                         ARMFPRegister lhs, ARMFPRegister rhs);
+  void compareSimd128Float(Assembler::Condition cond, ARMFPRegister dest,
+                           ARMFPRegister lhs, ARMFPRegister rhs);
+  void rightShiftInt8x16(FloatRegister lhs, Register rhs, FloatRegister dest,
+                         bool isUnsigned);
+  void rightShiftInt16x8(FloatRegister lhs, Register rhs, FloatRegister dest,
+                         bool isUnsigned);
+  void rightShiftInt32x4(FloatRegister lhs, Register rhs, FloatRegister dest,
+                         bool isUnsigned);
+  void rightShiftInt64x2(FloatRegister lhs, Register rhs, FloatRegister dest,
+                         bool isUnsigned);
+
+  void branchNegativeZero(FloatRegister reg, Register scratch, Label* label) {
+    MOZ_CRASH("branchNegativeZero");
+  }
+  void branchNegativeZeroFloat32(FloatRegister reg, Register scratch,
+                                 Label* label) {
+    MOZ_CRASH("branchNegativeZeroFloat32");
+  }
+
+  void boxDouble(FloatRegister src, const ValueOperand& dest, FloatRegister) {
+    Fmov(ARMRegister(dest.valueReg(), 64), ARMFPRegister(src, 64));
+  }
+  void boxNonDouble(JSValueType type, Register src, const ValueOperand& dest) {
+    boxValue(type, src, dest.valueReg());
+  }
+
+  // Note that the |dest| register here may be ScratchReg, so we shouldn't use
+  // it.
+  void unboxInt32(const ValueOperand& src, Register dest) {
+    move32(src.valueReg(), dest);
+  }
+  void unboxInt32(const Address& src, Register dest) { load32(src, dest); }
+  void unboxInt32(const BaseIndex& src, Register dest) { load32(src, dest); }
+
+  template <typename T>
+  void unboxDouble(const T& src, FloatRegister dest) {
+    loadDouble(src, dest);
+  }
+  void unboxDouble(const ValueOperand& src, FloatRegister dest) {
+    Fmov(ARMFPRegister(dest, 64), ARMRegister(src.valueReg(), 64));
+  }
+
+  void unboxArgObjMagic(const ValueOperand& src, Register dest) {
+    MOZ_CRASH("unboxArgObjMagic");
+  }
+  void unboxArgObjMagic(const Address& src, Register dest) {
+    MOZ_CRASH("unboxArgObjMagic");
+  }
+
+  void unboxBoolean(const ValueOperand& src, Register dest) {
+    move32(src.valueReg(), dest);
+  }
+  void unboxBoolean(const Address& src, Register dest) { load32(src, dest); }
+  void unboxBoolean(const BaseIndex& src, Register dest) { load32(src, dest); }
+
+  void unboxMagic(const ValueOperand& src, Register dest) {
+    move32(src.valueReg(), dest);
+  }
+  void unboxNonDouble(const ValueOperand& src, Register dest,
+                      JSValueType type) {
+    unboxNonDouble(src.valueReg(), dest, type);
+  }
+
+  template <typename T>
+  void unboxNonDouble(T src, Register dest, JSValueType type) {
+    MOZ_ASSERT(type != JSVAL_TYPE_DOUBLE);
+    if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+      load32(src, dest);
+      return;
+    }
+    loadPtr(src, dest);
+    unboxNonDouble(dest, dest, type);
+  }
+
+  void unboxNonDouble(Register src, Register dest, JSValueType type) {
+    MOZ_ASSERT(type != JSVAL_TYPE_DOUBLE);
+    if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+      move32(src, dest);
+      return;
+    }
+    Eor(ARMRegister(dest, 64), ARMRegister(src, 64),
+        Operand(JSVAL_TYPE_TO_SHIFTED_TAG(type)));
+  }
+
+  void notBoolean(const ValueOperand& val) {
+    ARMRegister r(val.valueReg(), 64);
+    eor(r, r, Operand(1));
+  }
+  void unboxObject(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src.valueReg(), dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObject(Register src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObject(const Address& src, Register dest) {
+    loadPtr(src, dest);
+    unboxNonDouble(dest, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObject(const BaseIndex& src, Register dest) {
+    doBaseIndex(ARMRegister(dest, 64), src, vixl::LDR_x);
+    unboxNonDouble(dest, dest, JSVAL_TYPE_OBJECT);
+  }
+
+  template <typename T>
+  void unboxObjectOrNull(const T& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+    And(ARMRegister(dest, 64), ARMRegister(dest, 64),
+        Operand(~JS::detail::ValueObjectOrNullBit));
+  }
+
+  // See comment in MacroAssembler-x64.h.
+  void unboxGCThingForGCBarrier(const Address& src, Register dest) {
+    loadPtr(src, dest);
+    And(ARMRegister(dest, 64), ARMRegister(dest, 64),
+        Operand(JS::detail::ValueGCThingPayloadMask));
+  }
+  void unboxGCThingForGCBarrier(const ValueOperand& src, Register dest) {
+    And(ARMRegister(dest, 64), ARMRegister(src.valueReg(), 64),
+        Operand(JS::detail::ValueGCThingPayloadMask));
+  }
+
+  // Like unboxGCThingForGCBarrier, but loads the GC thing's chunk base.
+  void getGCThingValueChunk(const Address& src, Register dest) {
+    loadPtr(src, dest);
+    And(ARMRegister(dest, 64), ARMRegister(dest, 64),
+        Operand(JS::detail::ValueGCThingPayloadChunkMask));
+  }
+  void getGCThingValueChunk(const ValueOperand& src, Register dest) {
+    And(ARMRegister(dest, 64), ARMRegister(src.valueReg(), 64),
+        Operand(JS::detail::ValueGCThingPayloadChunkMask));
+  }
+
+  inline void unboxValue(const ValueOperand& src, AnyRegister dest,
+                         JSValueType type);
+
+  void unboxString(const ValueOperand& operand, Register dest) {
+    unboxNonDouble(operand, dest, JSVAL_TYPE_STRING);
+  }
+  void unboxString(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_STRING);
+  }
+  void unboxSymbol(const ValueOperand& operand, Register dest) {
+    unboxNonDouble(operand, dest, JSVAL_TYPE_SYMBOL);
+  }
+  void unboxSymbol(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL);
+  }
+  void unboxBigInt(const ValueOperand& operand, Register dest) {
+    unboxNonDouble(operand, dest, JSVAL_TYPE_BIGINT);
+  }
+  void unboxBigInt(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT);
+  }
+  // These two functions use the low 32-bits of the full value register.
+  void boolValueToDouble(const ValueOperand& operand, FloatRegister dest) {
+    convertInt32ToDouble(operand.valueReg(), dest);
+  }
+  void int32ValueToDouble(const ValueOperand& operand, FloatRegister dest) {
+    convertInt32ToDouble(operand.valueReg(), dest);
+  }
+
+  void boolValueToFloat32(const ValueOperand& operand, FloatRegister dest) {
+    convertInt32ToFloat32(operand.valueReg(), dest);
+  }
+  void int32ValueToFloat32(const ValueOperand& operand, FloatRegister dest) {
+    convertInt32ToFloat32(operand.valueReg(), dest);
+  }
+
+  void loadConstantDouble(double d, FloatRegister dest) {
+    ARMFPRegister r(dest, 64);
+    if (d == 0.0) {
+      // Clang11 does movi for 0 and movi+fneg for -0, and this seems like a
+      // good implementation-independent strategy as it avoids any gpr->fpr
+      // moves or memory traffic.
+      Movi(r, 0);
+      if (std::signbit(d)) {
+        Fneg(r, r);
+      }
+    } else {
+      Fmov(r, d);
+    }
+  }
+  void loadConstantFloat32(float f, FloatRegister dest) {
+    ARMFPRegister r(dest, 32);
+    if (f == 0.0) {
+      // See comments above.  There's not a movi variant for a single register,
+      // so clear the double.
+      Movi(ARMFPRegister(dest, 64), 0);
+      if (std::signbit(f)) {
+        Fneg(r, r);
+      }
+    } else {
+      Fmov(r, f);
+    }
+  }
+
+  void cmpTag(Register tag, ImmTag ref) {
+    // As opposed to other architecture, splitTag is replaced by splitSignExtTag
+    // which extract the tag with a sign extension. The reason being that cmp32
+    // with a tag value would be too large to fit as a 12 bits immediate value,
+    // and would require the VIXL macro assembler to add an extra instruction
+    // and require extra scratch register to load the Tag value.
+    //
+    // Instead, we compare with the negative value of the sign extended tag with
+    // the CMN instruction. The sign extended tag is expected to be a negative
+    // value. Therefore the negative of the sign extended tag is expected to be
+    // near 0 and fit on 12 bits.
+    //
+    // Ignoring the sign extension, the logic is the following:
+    //
+    //   CMP32(Reg, Tag) = Reg - Tag
+    //                   = Reg + (-Tag)
+    //                   = CMN32(Reg, -Tag)
+    //
+    // Note: testGCThing, testPrimitive and testNumber which are checking for
+    // inequalities should use unsigned comparisons (as done by default) in
+    // order to keep the same relation order after the sign extension, i.e.
+    // using Above or Below which are based on the carry flag.
+    uint32_t hiShift = JSVAL_TAG_SHIFT - 32;
+    int32_t seTag = int32_t(ref.value);
+    seTag = (seTag << hiShift) >> hiShift;
+    MOZ_ASSERT(seTag < 0);
+    int32_t negTag = -seTag;
+    // Check thest negTag is encoded on a 12 bits immediate value.
+    MOZ_ASSERT((negTag & ~0xFFF) == 0);
+    cmn32(tag, Imm32(negTag));
+  }
+
+  // Register-based tests.
+  Condition testUndefined(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_UNDEFINED));
+    return cond;
+  }
+  Condition testInt32(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_INT32));
+    return cond;
+  }
+  Condition testBoolean(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_BOOLEAN));
+    return cond;
+  }
+  Condition testNull(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_NULL));
+    return cond;
+  }
+  Condition testString(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_STRING));
+    return cond;
+  }
+  Condition testSymbol(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_SYMBOL));
+    return cond;
+  }
+  Condition testBigInt(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_BIGINT));
+    return cond;
+  }
+  Condition testObject(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_OBJECT));
+    return cond;
+  }
+  Condition testDouble(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_MAX_DOUBLE));
+    // Requires unsigned comparison due to cmpTag internals.
+    return (cond == Equal) ? BelowOrEqual : Above;
+  }
+  Condition testNumber(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JS::detail::ValueUpperInclNumberTag));
+    // Requires unsigned comparison due to cmpTag internals.
+    return (cond == Equal) ? BelowOrEqual : Above;
+  }
+  Condition testGCThing(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag));
+    // Requires unsigned comparison due to cmpTag internals.
+    return (cond == Equal) ? AboveOrEqual : Below;
+  }
+  Condition testMagic(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JSVAL_TAG_MAGIC));
+    return cond;
+  }
+  Condition testPrimitive(Condition cond, Register tag) {
+    MOZ_ASSERT(cond == Equal || cond == NotEqual);
+    cmpTag(tag, ImmTag(JS::detail::ValueUpperExclPrimitiveTag));
+    // Requires unsigned comparison due to cmpTag internals.
+    return (cond == Equal) ? Below : AboveOrEqual;
+  }
+  Condition testError(Condition cond, Register tag) {
+    return testMagic(cond, tag);
+  }
+
+  // ValueOperand-based tests.
+  Condition testInt32(Condition cond, const ValueOperand& value) {
+    // The incoming ValueOperand may use scratch registers.
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(scratch != value.valueReg());
+
+    splitSignExtTag(value, scratch);
+    return testInt32(cond, scratch);
+  }
+  Condition testBoolean(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testBoolean(cond, scratch);
+  }
+  Condition testDouble(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testDouble(cond, scratch);
+  }
+  Condition testNull(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testNull(cond, scratch);
+  }
+  Condition testUndefined(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testUndefined(cond, scratch);
+  }
+  Condition testString(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testString(cond, scratch);
+  }
+  Condition testSymbol(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testSymbol(cond, scratch);
+  }
+  Condition testBigInt(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testBigInt(cond, scratch);
+  }
+  Condition testObject(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testObject(cond, scratch);
+  }
+  Condition testNumber(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testNumber(cond, scratch);
+  }
+  Condition testPrimitive(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testPrimitive(cond, scratch);
+  }
+  Condition testMagic(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testMagic(cond, scratch);
+  }
+  Condition testGCThing(Condition cond, const ValueOperand& value) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(value.valueReg() != scratch);
+    splitSignExtTag(value, scratch);
+    return testGCThing(cond, scratch);
+  }
+  Condition testError(Condition cond, const ValueOperand& value) {
+    return testMagic(cond, value);
+  }
+
+  // Address-based tests.
+  Condition testGCThing(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testGCThing(cond, scratch);
+  }
+  Condition testMagic(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testMagic(cond, scratch);
+  }
+  Condition testInt32(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testInt32(cond, scratch);
+  }
+  Condition testDouble(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testDouble(cond, scratch);
+  }
+  Condition testBoolean(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testBoolean(cond, scratch);
+  }
+  Condition testNull(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testNull(cond, scratch);
+  }
+  Condition testUndefined(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testUndefined(cond, scratch);
+  }
+  Condition testString(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testString(cond, scratch);
+  }
+  Condition testSymbol(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testSymbol(cond, scratch);
+  }
+  Condition testBigInt(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testBigInt(cond, scratch);
+  }
+  Condition testObject(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testObject(cond, scratch);
+  }
+  Condition testNumber(Condition cond, const Address& address) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(address.base != scratch);
+    splitSignExtTag(address, scratch);
+    return testNumber(cond, scratch);
+  }
+
+  // BaseIndex-based tests.
+  Condition testUndefined(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testUndefined(cond, scratch);
+  }
+  Condition testNull(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testNull(cond, scratch);
+  }
+  Condition testBoolean(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testBoolean(cond, scratch);
+  }
+  Condition testString(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testString(cond, scratch);
+  }
+  Condition testSymbol(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testSymbol(cond, scratch);
+  }
+  Condition testBigInt(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testBigInt(cond, scratch);
+  }
+  Condition testInt32(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testInt32(cond, scratch);
+  }
+  Condition testObject(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testObject(cond, scratch);
+  }
+  Condition testDouble(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testDouble(cond, scratch);
+  }
+  Condition testMagic(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testMagic(cond, scratch);
+  }
+  Condition testGCThing(Condition cond, const BaseIndex& src) {
+    vixl::UseScratchRegisterScope temps(this);
+    const Register scratch = temps.AcquireX().asUnsized();
+    MOZ_ASSERT(src.base != scratch);
+    MOZ_ASSERT(src.index != scratch);
+    splitSignExtTag(src, scratch);
+    return testGCThing(cond, scratch);
+  }
+
+  Condition testInt32Truthy(bool truthy, const ValueOperand& operand) {
+    ARMRegister payload32(operand.valueReg(), 32);
+    Tst(payload32, payload32);
+    return truthy ? NonZero : Zero;
+  }
+
+  Condition testBooleanTruthy(bool truthy, const ValueOperand& operand) {
+    ARMRegister payload32(operand.valueReg(), 32);
+    Tst(payload32, payload32);
+    return truthy ? NonZero : Zero;
+  }
+
+  Condition testBigIntTruthy(bool truthy, const ValueOperand& value);
+  Condition testStringTruthy(bool truthy, const ValueOperand& value);
+
+  void int32OrDouble(Register src, ARMFPRegister dest) {
+    Label isInt32;
+    Label join;
+    testInt32(Equal, ValueOperand(src));
+    B(&isInt32, Equal);
+    // is double, move the bits as is
+    Fmov(dest, ARMRegister(src, 64));
+    B(&join);
+    bind(&isInt32);
+    // is int32, do a conversion while moving
+    Scvtf(dest, ARMRegister(src, 32));
+    bind(&join);
+  }
+  void loadUnboxedValue(Address address, MIRType type, AnyRegister dest) {
+    if (dest.isFloat()) {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch64 = temps.AcquireX();
+      MOZ_ASSERT(scratch64.asUnsized() != address.base);
+      Ldr(scratch64, toMemOperand(address));
+      int32OrDouble(scratch64.asUnsized(), ARMFPRegister(dest.fpu(), 64));
+    } else {
+      unboxNonDouble(address, dest.gpr(), ValueTypeFromMIRType(type));
+    }
+  }
+
+  void loadUnboxedValue(BaseIndex address, MIRType type, AnyRegister dest) {
+    if (dest.isFloat()) {
+      vixl::UseScratchRegisterScope temps(this);
+      const ARMRegister scratch64 = temps.AcquireX();
+      MOZ_ASSERT(scratch64.asUnsized() != address.base);
+      MOZ_ASSERT(scratch64.asUnsized() != address.index);
+      doBaseIndex(scratch64, address, vixl::LDR_x);
+      int32OrDouble(scratch64.asUnsized(), ARMFPRegister(dest.fpu(), 64));
+    } else {
+      unboxNonDouble(address, dest.gpr(), ValueTypeFromMIRType(type));
+    }
+  }
+
+  // Emit a B that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
+  CodeOffset toggledJump(Label* label) {
+    BufferOffset offset = b(label, Always);
+    CodeOffset ret(offset.getOffset());
+    return ret;
+  }
+
+  // load: offset to the load instruction obtained by movePatchablePtr().
+  void writeDataRelocation(ImmGCPtr ptr, BufferOffset load) {
+    // Raw GC pointer relocations and Value relocations both end up in
+    // Assembler::TraceDataRelocations.
+    if (ptr.value) {
+      if (gc::IsInsideNursery(ptr.value)) {
+        embedsNurseryPointers_ = true;
+      }
+      dataRelocations_.writeUnsigned(load.getOffset());
+    }
+  }
+  void writeDataRelocation(const Value& val, BufferOffset load) {
+    // Raw GC pointer relocations and Value relocations both end up in
+    // Assembler::TraceDataRelocations.
+    if (val.isGCThing()) {
+      gc::Cell* cell = val.toGCThing();
+      if (cell && gc::IsInsideNursery(cell)) {
+        embedsNurseryPointers_ = true;
+      }
+      dataRelocations_.writeUnsigned(load.getOffset());
+    }
+  }
+
+  void computeEffectiveAddress(const Address& address, Register dest) {
+    Add(ARMRegister(dest, 64), toARMRegister(address.base, 64),
+        Operand(address.offset));
+  }
+  void computeEffectiveAddress(const Address& address, RegisterOrSP dest) {
+    Add(toARMRegister(dest, 64), toARMRegister(address.base, 64),
+        Operand(address.offset));
+  }
+  void computeEffectiveAddress(const BaseIndex& address, Register dest) {
+    ARMRegister dest64(dest, 64);
+    ARMRegister base64 = toARMRegister(address.base, 64);
+    ARMRegister index64(address.index, 64);
+
+    Add(dest64, base64, Operand(index64, vixl::LSL, address.scale));
+    if (address.offset) {
+      Add(dest64, dest64, Operand(address.offset));
+    }
+  }
+
+ public:
+  void handleFailureWithHandlerTail(Label* profilerExitTail,
+                                    Label* bailoutTail);
+
+  void profilerEnterFrame(Register framePtr, Register scratch);
+  void profilerExitFrame();
+
+  void wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register memoryBase,
+                    Register ptr, AnyRegister outany, Register64 out64);
+  void wasmLoadImpl(const wasm::MemoryAccessDesc& access, MemOperand srcAddr,
+                    AnyRegister outany, Register64 out64);
+  void wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister valany,
+                     Register64 val64, Register memoryBase, Register ptr);
+  void wasmStoreImpl(const wasm::MemoryAccessDesc& access, MemOperand destAddr,
+                     AnyRegister valany, Register64 val64);
+  // The complete address is in `address`, and `access` is used for its type
+  // attributes only; its `offset` is ignored.
+  void wasmLoadAbsolute(const wasm::MemoryAccessDesc& access,
+                        Register memoryBase, uint64_t address, AnyRegister out,
+                        Register64 out64);
+  void wasmStoreAbsolute(const wasm::MemoryAccessDesc& access,
+                         AnyRegister value, Register64 value64,
+                         Register memoryBase, uint64_t address);
+
+  // Emit a BLR or NOP instruction. ToggleCall can be used to patch
+  // this instruction.
+  CodeOffset toggledCall(JitCode* target, bool enabled) {
+    // The returned offset must be to the first instruction generated,
+    // for the debugger to match offset with Baseline's pcMappingEntries_.
+    BufferOffset offset = nextOffset();
+
+    // It is unclear why this sync is necessary:
+    // * PSP and SP have been observed to be different in testcase
+    //   tests/cacheir/bug1448136.js
+    // * Removing the sync causes no failures in all of jit-tests.
+    syncStackPtr();
+
+    BufferOffset loadOffset;
+    {
+      vixl::UseScratchRegisterScope temps(this);
+
+      // The register used for the load is hardcoded, so that ToggleCall
+      // can patch in the branch instruction easily. This could be changed,
+      // but then ToggleCall must read the target register from the load.
+      MOZ_ASSERT(temps.IsAvailable(ScratchReg2_64));
+      temps.Exclude(ScratchReg2_64);
+
+      loadOffset = immPool64(ScratchReg2_64, uint64_t(target->raw()));
+
+      if (enabled) {
+        blr(ScratchReg2_64);
+      } else {
+        nop();
+      }
+    }
+
+    addPendingJump(loadOffset, ImmPtr(target->raw()), RelocationKind::JITCODE);
+    CodeOffset ret(offset.getOffset());
+    return ret;
+  }
+
+  static size_t ToggledCallSize(uint8_t* code) {
+    // The call site is a sequence of two or three instructions:
+    //
+    //   syncStack (optional)
+    //   ldr/adr
+    //   nop/blr
+    //
+    // Flushed constant pools can appear before any of the instructions.
+
+    const Instruction* cur = (const Instruction*)code;
+    cur = cur->skipPool();
+    if (cur->IsStackPtrSync()) cur = cur->NextInstruction();
+    cur = cur->skipPool();
+    cur = cur->NextInstruction();  // LDR/ADR
+    cur = cur->skipPool();
+    cur = cur->NextInstruction();  // NOP/BLR
+    return (uint8_t*)cur - code;
+  }
+
+  void checkARMRegAlignment(const ARMRegister& reg) {
+#ifdef DEBUG
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch64 = temps.AcquireX();
+    MOZ_ASSERT_IF(!reg.IsSP(), scratch64.asUnsized() != reg.asUnsized());
+    Label aligned;
+    Mov(scratch64, reg);
+    Tst(scratch64, Operand(StackAlignment - 1));
+    B(Zero, &aligned);
+    breakpoint();
+    bind(&aligned);
+    Mov(scratch64, vixl::xzr);  // Clear the scratch register for sanity.
+#endif
+  }
+
+  void checkStackAlignment() {
+#ifdef DEBUG
+    checkARMRegAlignment(GetStackPointer64());
+
+    // If another register is being used to track pushes, check sp explicitly.
+    if (!GetStackPointer64().Is(vixl::sp)) {
+      checkARMRegAlignment(vixl::sp);
+    }
+#endif
+  }
+
+  void abiret() {
+    syncStackPtr();  // SP is always used to transmit the stack between calls.
+    vixl::MacroAssembler::Ret(vixl::lr);
+  }
+
+  void incrementInt32Value(const Address& addr) {
+    vixl::UseScratchRegisterScope temps(this);
+    const ARMRegister scratch32 = temps.AcquireW();
+    MOZ_ASSERT(scratch32.asUnsized() != addr.base);
+
+    load32(addr, scratch32.asUnsized());
+    Add(scratch32, scratch32, Operand(1));
+    store32(scratch32.asUnsized(), addr);
+  }
+
+  void breakpoint();
+
+  // Emits a simulator directive to save the current sp on an internal stack.
+  void simulatorMarkSP() {
+#ifdef JS_SIMULATOR_ARM64
+    svc(vixl::kMarkStackPointer);
+#endif
+  }
+
+  // Emits a simulator directive to pop from its internal stack
+  // and assert that the value is equal to the current sp.
+  void simulatorCheckSP() {
+#ifdef JS_SIMULATOR_ARM64
+    svc(vixl::kCheckStackPointer);
+#endif
+  }
+
+ protected:
+  bool buildOOLFakeExitFrame(void* fakeReturnAddr);
+};
+
+// See documentation for ScratchTagScope and ScratchTagScopeRelease in
+// MacroAssembler-x64.h.
+
+class ScratchTagScope {
+  vixl::UseScratchRegisterScope temps_;
+  ARMRegister scratch64_;
+  bool owned_;
+  mozilla::DebugOnly<bool> released_;
+
+ public:
+  ScratchTagScope(MacroAssemblerCompat& masm, const ValueOperand&)
+      : temps_(&masm), owned_(true), released_(false) {
+    scratch64_ = temps_.AcquireX();
+  }
+
+  operator Register() {
+    MOZ_ASSERT(!released_);
+    return scratch64_.asUnsized();
+  }
+
+  void release() {
+    MOZ_ASSERT(!released_);
+    released_ = true;
+    if (owned_) {
+      temps_.Release(scratch64_);
+      owned_ = false;
+    }
+  }
+
+  void reacquire() {
+    MOZ_ASSERT(released_);
+    released_ = false;
+  }
+};
+
+class ScratchTagScopeRelease {
+  ScratchTagScope* ts_;
+
+ public:
+  explicit ScratchTagScopeRelease(ScratchTagScope* ts) : ts_(ts) {
+    ts_->release();
+  }
+  ~ScratchTagScopeRelease() { ts_->reacquire(); }
+};
+
+inline void MacroAssemblerCompat::splitTagForTest(const ValueOperand& value,
+                                                  ScratchTagScope& tag) {
+  splitSignExtTag(value, tag);
+}
+
+typedef MacroAssemblerCompat MacroAssemblerSpecific;
+
+}  // namespace jit
+}  // namespace js
+
+#endif  // jit_arm64_MacroAssembler_arm64_h
diff --git a/js/src/jit/arm64/MoveEmitter-arm64.cpp b/js/src/jit/arm64/MoveEmitter-arm64.cpp
new file mode 100644
index 0000000000..fa1bb1209e
--- /dev/null
+++ b/js/src/jit/arm64/MoveEmitter-arm64.cpp
@@ -0,0 +1,329 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/MoveEmitter-arm64.h"
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+MemOperand MoveEmitterARM64::toMemOperand(const MoveOperand& operand) const {
+  MOZ_ASSERT(operand.isMemory());
+  ARMRegister base(operand.base(), 64);
+  if (operand.base() == masm.getStackPointer()) {
+    return MemOperand(base,
+                      operand.disp() + (masm.framePushed() - pushedAtStart_));
+  }
+  return MemOperand(base, operand.disp());
+}
+
+void MoveEmitterARM64::emit(const MoveResolver& moves) {
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  // We have two scratch general registers, so use one as temporary storage for
+  // breaking cycles and leave the other available for memory to memory moves.
+  //
+  // This register is used when breaking GENERAL, INT32, FLOAT32, and DOUBLE
+  // move cycles. For FLOAT32/DOUBLE, this involves a fmov between float and
+  // general registers. We could avoid this if we had an extra scratch float
+  // register, otherwise we need the scratch float register for memory to
+  // memory moves that may happen in the cycle. We cannot use the scratch
+  // general register for SIMD128 cycles as it is not large enough.
+  cycleGeneralReg_ = temps.AcquireX();
+
+  for (size_t i = 0; i < moves.numMoves(); i++) {
+    emitMove(moves.getMove(i));
+  }
+
+  cycleGeneralReg_ = ARMRegister();
+}
+
+void MoveEmitterARM64::finish() {
+  assertDone();
+  masm.freeStack(masm.framePushed() - pushedAtStart_);
+  MOZ_ASSERT(masm.framePushed() == pushedAtStart_);
+}
+
+void MoveEmitterARM64::emitMove(const MoveOp& move) {
+  const MoveOperand& from = move.from();
+  const MoveOperand& to = move.to();
+
+  if (move.isCycleBegin()) {
+    MOZ_ASSERT(!inCycle_ && !move.isCycleEnd());
+    breakCycle(from, to, move.endCycleType());
+    inCycle_ = true;
+  } else if (move.isCycleEnd()) {
+    MOZ_ASSERT(inCycle_);
+    completeCycle(from, to, move.type());
+    inCycle_ = false;
+    return;
+  }
+
+  switch (move.type()) {
+    case MoveOp::FLOAT32:
+      emitFloat32Move(from, to);
+      break;
+    case MoveOp::DOUBLE:
+      emitDoubleMove(from, to);
+      break;
+    case MoveOp::SIMD128:
+      emitSimd128Move(from, to);
+      break;
+    case MoveOp::INT32:
+      emitInt32Move(from, to);
+      break;
+    case MoveOp::GENERAL:
+      emitGeneralMove(from, to);
+      break;
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
+
+void MoveEmitterARM64::emitFloat32Move(const MoveOperand& from,
+                                       const MoveOperand& to) {
+  if (from.isFloatReg()) {
+    if (to.isFloatReg()) {
+      masm.Fmov(toFPReg(to, MoveOp::FLOAT32), toFPReg(from, MoveOp::FLOAT32));
+    } else {
+      masm.Str(toFPReg(from, MoveOp::FLOAT32), toMemOperand(to));
+    }
+    return;
+  }
+
+  if (to.isFloatReg()) {
+    masm.Ldr(toFPReg(to, MoveOp::FLOAT32), toMemOperand(from));
+    return;
+  }
+
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  const ARMFPRegister scratch32 = temps.AcquireS();
+  masm.Ldr(scratch32, toMemOperand(from));
+  masm.Str(scratch32, toMemOperand(to));
+}
+
+void MoveEmitterARM64::emitDoubleMove(const MoveOperand& from,
+                                      const MoveOperand& to) {
+  if (from.isFloatReg()) {
+    if (to.isFloatReg()) {
+      masm.Fmov(toFPReg(to, MoveOp::DOUBLE), toFPReg(from, MoveOp::DOUBLE));
+    } else {
+      masm.Str(toFPReg(from, MoveOp::DOUBLE), toMemOperand(to));
+    }
+    return;
+  }
+
+  if (to.isFloatReg()) {
+    masm.Ldr(toFPReg(to, MoveOp::DOUBLE), toMemOperand(from));
+    return;
+  }
+
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  const ARMFPRegister scratch = temps.AcquireD();
+  masm.Ldr(scratch, toMemOperand(from));
+  masm.Str(scratch, toMemOperand(to));
+}
+
+void MoveEmitterARM64::emitSimd128Move(const MoveOperand& from,
+                                       const MoveOperand& to) {
+  if (from.isFloatReg()) {
+    if (to.isFloatReg()) {
+      masm.Mov(toFPReg(to, MoveOp::SIMD128), toFPReg(from, MoveOp::SIMD128));
+    } else {
+      masm.Str(toFPReg(from, MoveOp::SIMD128), toMemOperand(to));
+    }
+    return;
+  }
+
+  if (to.isFloatReg()) {
+    masm.Ldr(toFPReg(to, MoveOp::SIMD128), toMemOperand(from));
+    return;
+  }
+
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  const ARMFPRegister scratch = temps.AcquireQ();
+  masm.Ldr(scratch, toMemOperand(from));
+  masm.Str(scratch, toMemOperand(to));
+}
+
+void MoveEmitterARM64::emitInt32Move(const MoveOperand& from,
+                                     const MoveOperand& to) {
+  if (from.isGeneralReg()) {
+    if (to.isGeneralReg()) {
+      masm.Mov(toARMReg32(to), toARMReg32(from));
+    } else {
+      masm.Str(toARMReg32(from), toMemOperand(to));
+    }
+    return;
+  }
+
+  if (to.isGeneralReg()) {
+    masm.Ldr(toARMReg32(to), toMemOperand(from));
+    return;
+  }
+
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  const ARMRegister scratch32 = temps.AcquireW();
+  masm.Ldr(scratch32, toMemOperand(from));
+  masm.Str(scratch32, toMemOperand(to));
+}
+
+void MoveEmitterARM64::emitGeneralMove(const MoveOperand& from,
+                                       const MoveOperand& to) {
+  if (from.isGeneralReg()) {
+    MOZ_ASSERT(to.isGeneralReg() || to.isMemory());
+    if (to.isGeneralReg()) {
+      masm.Mov(toARMReg64(to), toARMReg64(from));
+    } else {
+      masm.Str(toARMReg64(from), toMemOperand(to));
+    }
+    return;
+  }
+
+  // {Memory OR EffectiveAddress} -> Register move.
+  if (to.isGeneralReg()) {
+    MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
+    if (from.isMemory()) {
+      masm.Ldr(toARMReg64(to), toMemOperand(from));
+    } else {
+      masm.Add(toARMReg64(to), toARMReg64(from), Operand(from.disp()));
+    }
+    return;
+  }
+
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  const ARMRegister scratch64 = temps.AcquireX();
+
+  // Memory -> Memory move.
+  if (from.isMemory()) {
+    MOZ_ASSERT(to.isMemory());
+    masm.Ldr(scratch64, toMemOperand(from));
+    masm.Str(scratch64, toMemOperand(to));
+    return;
+  }
+
+  // EffectiveAddress -> Memory move.
+  MOZ_ASSERT(from.isEffectiveAddress());
+  MOZ_ASSERT(to.isMemory());
+  masm.Add(scratch64, toARMReg64(from), Operand(from.disp()));
+  masm.Str(scratch64, toMemOperand(to));
+}
+
+MemOperand MoveEmitterARM64::cycleSlot() {
+  // Using SP as stack pointer requires alignment preservation below.
+  MOZ_ASSERT(!masm.GetStackPointer64().Is(sp));
+
+  // Allocate a slot for breaking cycles if we have not already
+  if (pushedAtCycle_ == -1) {
+    static_assert(SpillSlotSize == 16);
+    masm.reserveStack(SpillSlotSize);
+    pushedAtCycle_ = masm.framePushed();
+  }
+
+  return MemOperand(masm.GetStackPointer64(),
+                    masm.framePushed() - pushedAtCycle_);
+}
+
+void MoveEmitterARM64::breakCycle(const MoveOperand& from,
+                                  const MoveOperand& to, MoveOp::Type type) {
+  switch (type) {
+    case MoveOp::FLOAT32:
+      if (to.isMemory()) {
+        masm.Ldr(cycleGeneralReg_.W(), toMemOperand(to));
+      } else {
+        masm.Fmov(cycleGeneralReg_.W(), toFPReg(to, type));
+      }
+      break;
+
+    case MoveOp::DOUBLE:
+      if (to.isMemory()) {
+        masm.Ldr(cycleGeneralReg_.X(), toMemOperand(to));
+      } else {
+        masm.Fmov(cycleGeneralReg_.X(), toFPReg(to, type));
+      }
+      break;
+
+    case MoveOp::SIMD128:
+      if (to.isMemory()) {
+        vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+        const ARMFPRegister scratch128 = temps.AcquireQ();
+        masm.Ldr(scratch128, toMemOperand(to));
+        masm.Str(scratch128, cycleSlot());
+      } else {
+        masm.Str(toFPReg(to, type), cycleSlot());
+      }
+      break;
+
+    case MoveOp::INT32:
+      if (to.isMemory()) {
+        masm.Ldr(cycleGeneralReg_.W(), toMemOperand(to));
+      } else {
+        masm.Mov(cycleGeneralReg_.W(), toARMReg32(to));
+      }
+      break;
+
+    case MoveOp::GENERAL:
+      if (to.isMemory()) {
+        masm.Ldr(cycleGeneralReg_.X(), toMemOperand(to));
+      } else {
+        masm.Mov(cycleGeneralReg_.X(), toARMReg64(to));
+      }
+      break;
+
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
+
+void MoveEmitterARM64::completeCycle(const MoveOperand& from,
+                                     const MoveOperand& to, MoveOp::Type type) {
+  switch (type) {
+    case MoveOp::FLOAT32:
+      if (to.isMemory()) {
+        masm.Str(cycleGeneralReg_.W(), toMemOperand(to));
+      } else {
+        masm.Fmov(toFPReg(to, type), cycleGeneralReg_.W());
+      }
+      break;
+
+    case MoveOp::DOUBLE:
+      if (to.isMemory()) {
+        masm.Str(cycleGeneralReg_.X(), toMemOperand(to));
+      } else {
+        masm.Fmov(toFPReg(to, type), cycleGeneralReg_.X());
+      }
+      break;
+
+    case MoveOp::SIMD128:
+      if (to.isMemory()) {
+        vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+        const ARMFPRegister scratch = temps.AcquireQ();
+        masm.Ldr(scratch, cycleSlot());
+        masm.Str(scratch, toMemOperand(to));
+      } else {
+        masm.Ldr(toFPReg(to, type), cycleSlot());
+      }
+      break;
+
+    case MoveOp::INT32:
+      if (to.isMemory()) {
+        masm.Str(cycleGeneralReg_.W(), toMemOperand(to));
+      } else {
+        masm.Mov(toARMReg32(to), cycleGeneralReg_.W());
+      }
+      break;
+
+    case MoveOp::GENERAL:
+      if (to.isMemory()) {
+        masm.Str(cycleGeneralReg_.X(), toMemOperand(to));
+      } else {
+        masm.Mov(toARMReg64(to), cycleGeneralReg_.X());
+      }
+      break;
+
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
diff --git a/js/src/jit/arm64/MoveEmitter-arm64.h b/js/src/jit/arm64/MoveEmitter-arm64.h
new file mode 100644
index 0000000000..fec2e3e012
--- /dev/null
+++ b/js/src/jit/arm64/MoveEmitter-arm64.h
@@ -0,0 +1,99 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_MoveEmitter_arm64_h
+#define jit_arm64_MoveEmitter_arm64_h
+
+#include "mozilla/Assertions.h"
+
+#include <stdint.h>
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/MacroAssembler.h"
+#include "jit/MoveResolver.h"
+#include "jit/Registers.h"
+
+namespace js {
+namespace jit {
+
+class CodeGenerator;
+
+class MoveEmitterARM64 {
+  bool inCycle_;
+  MacroAssembler& masm;
+
+  // A scratch general register used to break cycles.
+  ARMRegister cycleGeneralReg_;
+
+  // Original stack push value.
+  uint32_t pushedAtStart_;
+
+  // This stores a stack offset to a spill location, snapshotting
+  // codegen->framePushed_ at the time it was allocated. It is -1 if no
+  // stack space has been allocated for that particular spill.
+  int32_t pushedAtCycle_;
+
+  void assertDone() { MOZ_ASSERT(!inCycle_); }
+
+  MemOperand cycleSlot();
+  MemOperand toMemOperand(const MoveOperand& operand) const;
+  ARMRegister toARMReg32(const MoveOperand& operand) const {
+    MOZ_ASSERT(operand.isGeneralReg());
+    return ARMRegister(operand.reg(), 32);
+  }
+  ARMRegister toARMReg64(const MoveOperand& operand) const {
+    if (operand.isGeneralReg()) {
+      return ARMRegister(operand.reg(), 64);
+    } else {
+      return ARMRegister(operand.base(), 64);
+    }
+  }
+  ARMFPRegister toFPReg(const MoveOperand& operand, MoveOp::Type t) const {
+    MOZ_ASSERT(operand.isFloatReg());
+    switch (t) {
+      case MoveOp::FLOAT32:
+        return ARMFPRegister(operand.floatReg().encoding(), 32);
+      case MoveOp::DOUBLE:
+        return ARMFPRegister(operand.floatReg().encoding(), 64);
+      case MoveOp::SIMD128:
+        return ARMFPRegister(operand.floatReg().encoding(), 128);
+      default:
+        MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Bad register type");
+    }
+  }
+
+  void emitFloat32Move(const MoveOperand& from, const MoveOperand& to);
+  void emitDoubleMove(const MoveOperand& from, const MoveOperand& to);
+  void emitSimd128Move(const MoveOperand& from, const MoveOperand& to);
+  void emitInt32Move(const MoveOperand& from, const MoveOperand& to);
+  void emitGeneralMove(const MoveOperand& from, const MoveOperand& to);
+
+  void emitMove(const MoveOp& move);
+  void breakCycle(const MoveOperand& from, const MoveOperand& to,
+                  MoveOp::Type type);
+  void completeCycle(const MoveOperand& from, const MoveOperand& to,
+                     MoveOp::Type type);
+
+ public:
+  explicit MoveEmitterARM64(MacroAssembler& masm)
+      : inCycle_(false),
+        masm(masm),
+        pushedAtStart_(masm.framePushed()),
+        pushedAtCycle_(-1) {}
+
+  ~MoveEmitterARM64() { assertDone(); }
+
+  void emit(const MoveResolver& moves);
+  void finish();
+  void setScratchRegister(Register reg) {}
+};
+
+typedef MoveEmitterARM64 MoveEmitter;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm64_MoveEmitter_arm64_h */
diff --git a/js/src/jit/arm64/SharedICHelpers-arm64-inl.h b/js/src/jit/arm64/SharedICHelpers-arm64-inl.h
new file mode 100644
index 0000000000..8261a8b94f
--- /dev/null
+++ b/js/src/jit/arm64/SharedICHelpers-arm64-inl.h
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_SharedICHelpers_arm64_inl_h
+#define jit_arm64_SharedICHelpers_arm64_inl_h
+
+#include "jit/BaselineFrame.h"
+#include "jit/SharedICHelpers.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+namespace js {
+namespace jit {
+
+inline void EmitBaselineTailCallVM(TrampolinePtr target, MacroAssembler& masm,
+                                   uint32_t argSize) {
+#ifdef DEBUG
+  // We assume that R0 has been pushed, and R2 is unused.
+  static_assert(R2 == ValueOperand(r0));
+
+  // Store frame size without VMFunction arguments for debug assertions.
+  masm.Sub(x0, FramePointer64, masm.GetStackPointer64());
+  masm.Sub(w0, w0, Operand(argSize));
+  Address frameSizeAddr(FramePointer,
+                        BaselineFrame::reverseOffsetOfDebugFrameSize());
+  masm.store32(w0.asUnsized(), frameSizeAddr);
+#endif
+
+  // Push frame descriptor (minus the return address) and perform the tail call.
+  static_assert(ICTailCallReg == lr);
+  masm.pushFrameDescriptor(FrameType::BaselineJS);
+
+  // The return address will be pushed by the VM wrapper, for compatibility
+  // with direct calls. Refer to the top of generateVMWrapper().
+  // ICTailCallReg (lr) already contains the return address (as we keep
+  // it there through the stub calls).
+
+  masm.jump(target);
+}
+
+inline void EmitBaselineCallVM(TrampolinePtr target, MacroAssembler& masm) {
+  masm.pushFrameDescriptor(FrameType::BaselineStub);
+  masm.call(target);
+}
+
+inline void EmitBaselineEnterStubFrame(MacroAssembler& masm, Register scratch) {
+  MOZ_ASSERT(scratch != ICTailCallReg);
+
+#ifdef DEBUG
+  // Compute frame size.
+  masm.Sub(ARMRegister(scratch, 64), FramePointer64, masm.GetStackPointer64());
+
+  Address frameSizeAddr(FramePointer,
+                        BaselineFrame::reverseOffsetOfDebugFrameSize());
+  masm.store32(scratch, frameSizeAddr);
+#endif
+
+  // Push frame descriptor and return address.
+  // Save old frame pointer, stack pointer, and stub reg.
+  masm.PushFrameDescriptor(FrameType::BaselineJS);
+  masm.Push(ICTailCallReg);
+  masm.Push(FramePointer);
+
+  // Update the frame register.
+  masm.Mov(FramePointer64, masm.GetStackPointer64());
+
+  masm.Push(ICStubReg);
+
+  // Stack should remain 16-byte aligned.
+  masm.checkStackAlignment();
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif  // jit_arm64_SharedICHelpers_arm64_inl_h
diff --git a/js/src/jit/arm64/SharedICHelpers-arm64.h b/js/src/jit/arm64/SharedICHelpers-arm64.h
new file mode 100644
index 0000000000..2ea45c80fb
--- /dev/null
+++ b/js/src/jit/arm64/SharedICHelpers-arm64.h
@@ -0,0 +1,82 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_SharedICHelpers_arm64_h
+#define jit_arm64_SharedICHelpers_arm64_h
+
+#include "jit/BaselineIC.h"
+#include "jit/JitFrames.h"
+#include "jit/MacroAssembler.h"
+#include "jit/SharedICRegisters.h"
+
+namespace js {
+namespace jit {
+
+// Distance from sp to the top Value inside an IC stub (no return address on the
+// stack on ARM).
+static const size_t ICStackValueOffset = 0;
+
+inline void EmitRestoreTailCallReg(MacroAssembler& masm) {
+  // No-op on ARM because link register is always holding the return address.
+}
+
+inline void EmitRepushTailCallReg(MacroAssembler& masm) {
+  // No-op on ARM because link register is always holding the return address.
+}
+
+inline void EmitCallIC(MacroAssembler& masm, CodeOffset* callOffset) {
+  // The stub pointer must already be in ICStubReg.
+  // Load stubcode pointer from the ICStub.
+  // R2 won't be active when we call ICs, so we can use r0.
+  static_assert(R2 == ValueOperand(r0));
+  masm.loadPtr(Address(ICStubReg, ICStub::offsetOfStubCode()), r0);
+
+  // Call the stubcode via a direct branch-and-link.
+  masm.Blr(x0);
+  *callOffset = CodeOffset(masm.currentOffset());
+}
+
+inline void EmitReturnFromIC(MacroAssembler& masm) {
+  masm.abiret();  // Defaults to lr.
+}
+
+inline void EmitBaselineLeaveStubFrame(MacroAssembler& masm) {
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  const ARMRegister scratch64 = temps.AcquireX();
+
+  Address stubAddr(FramePointer, BaselineStubFrameLayout::ICStubOffsetFromFP);
+  masm.loadPtr(stubAddr, ICStubReg);
+
+  masm.moveToStackPtr(FramePointer);
+
+  // Pop values, discarding the frame descriptor.
+  masm.pop(FramePointer, ICTailCallReg, scratch64.asUnsized());
+
+  // Stack should remain 16-byte aligned.
+  masm.checkStackAlignment();
+}
+
+template <typename AddrType>
+inline void EmitPreBarrier(MacroAssembler& masm, const AddrType& addr,
+                           MIRType type) {
+  // On AArch64, lr is clobbered by guardedCallPreBarrier. Save it first.
+  masm.push(lr);
+  masm.guardedCallPreBarrier(addr, type);
+  masm.pop(lr);
+}
+
+inline void EmitStubGuardFailure(MacroAssembler& masm) {
+  // Load next stub into ICStubReg.
+  masm.loadPtr(Address(ICStubReg, ICCacheIRStub::offsetOfNext()), ICStubReg);
+
+  // Return address is already loaded, just jump to the next stubcode.
+  masm.jump(Address(ICStubReg, ICStub::offsetOfStubCode()));
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif  // jit_arm64_SharedICHelpers_arm64_h
diff --git a/js/src/jit/arm64/SharedICRegisters-arm64.h b/js/src/jit/arm64/SharedICRegisters-arm64.h
new file mode 100644
index 0000000000..1aa49d651c
--- /dev/null
+++ b/js/src/jit/arm64/SharedICRegisters-arm64.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_SharedICRegisters_arm64_h
+#define jit_arm64_SharedICRegisters_arm64_h
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/Registers.h"
+#include "jit/RegisterSets.h"
+
+namespace js {
+namespace jit {
+
+// ValueOperands R0, R1, and R2.
+// R0 == JSReturnReg, and R2 uses registers not preserved across calls.
+// R1 value should be preserved across calls.
+static constexpr Register R0_ = r2;
+static constexpr Register R1_ = r19;
+static constexpr Register R2_ = r0;
+
+static constexpr ValueOperand R0(R0_);
+static constexpr ValueOperand R1(R1_);
+static constexpr ValueOperand R2(R2_);
+
+// ICTailCallReg and ICStubReg use registers that are not preserved across
+// calls.
+static constexpr Register ICTailCallReg = r30;
+static constexpr Register ICStubReg = r9;
+
+// R7 - R9 are generally available for use within stubcode.
+
+// Note that BaselineTailCallReg is actually just the link
+// register.  In ARM code emission, we do not clobber BaselineTailCallReg
+// since we keep the return address for calls there.
+
+static constexpr FloatRegister FloatReg0 = {FloatRegisters::d0,
+                                            FloatRegisters::Double};
+static constexpr FloatRegister FloatReg1 = {FloatRegisters::d1,
+                                            FloatRegisters::Double};
+static constexpr FloatRegister FloatReg2 = {FloatRegisters::d2,
+                                            FloatRegisters::Double};
+static constexpr FloatRegister FloatReg3 = {FloatRegisters::d3,
+                                            FloatRegisters::Double};
+
+}  // namespace jit
+}  // namespace js
+
+#endif  // jit_arm64_SharedICRegisters_arm64_h
diff --git a/js/src/jit/arm64/Trampoline-arm64.cpp b/js/src/jit/arm64/Trampoline-arm64.cpp
new file mode 100644
index 0000000000..36f7f24d02
--- /dev/null
+++ b/js/src/jit/arm64/Trampoline-arm64.cpp
@@ -0,0 +1,840 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/SharedICHelpers-arm64.h"
+#include "jit/Bailouts.h"
+#include "jit/BaselineFrame.h"
+#include "jit/CalleeToken.h"
+#include "jit/JitFrames.h"
+#include "jit/JitRuntime.h"
+#include "jit/PerfSpewer.h"
+#include "jit/VMFunctions.h"
+#include "vm/JitActivation.h"  // js::jit::JitActivation
+#include "vm/JSContext.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+/* This method generates a trampoline on ARM64 for a c++ function with
+ * the following signature:
+ *   bool blah(void* code, int argc, Value* argv,
+ *             JSObject* scopeChain, Value* vp)
+ *   ...using standard AArch64 calling convention
+ */
+void JitRuntime::generateEnterJIT(JSContext* cx, MacroAssembler& masm) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateEnterJIT");
+
+  enterJITOffset_ = startTrampolineCode(masm);
+
+  const Register reg_code = IntArgReg0;      // EnterJitData::jitcode.
+  const Register reg_argc = IntArgReg1;      // EnterJitData::maxArgc.
+  const Register reg_argv = IntArgReg2;      // EnterJitData::maxArgv.
+  const Register reg_osrFrame = IntArgReg3;  // EnterJitData::osrFrame.
+  const Register reg_callee = IntArgReg4;    // EnterJitData::calleeToken.
+  const Register reg_scope = IntArgReg5;     // EnterJitData::scopeChain.
+  const Register reg_osrNStack =
+      IntArgReg6;                      // EnterJitData::osrNumStackValues.
+  const Register reg_vp = IntArgReg7;  // Address of EnterJitData::result.
+
+  static_assert(OsrFrameReg == IntArgReg3);
+
+  // During the pushes below, use the normal stack pointer.
+  masm.SetStackPointer64(sp);
+
+  // Save return address and old frame pointer; set new frame pointer.
+  masm.push(r30, r29);
+  masm.moveStackPtrTo(r29);
+
+  // Save callee-save integer registers.
+  // Also save x7 (reg_vp) and x30 (lr), for use later.
+  masm.push(r19, r20, r21, r22);
+  masm.push(r23, r24, r25, r26);
+  masm.push(r27, r28, r7, r30);
+
+  // Save callee-save floating-point registers.
+  // AArch64 ABI specifies that only the lower 64 bits must be saved.
+  masm.push(d8, d9, d10, d11);
+  masm.push(d12, d13, d14, d15);
+
+#ifdef DEBUG
+  // Emit stack canaries.
+  masm.movePtr(ImmWord(0xdeadd00d), r23);
+  masm.movePtr(ImmWord(0xdeadd11d), r24);
+  masm.push(r23, r24);
+#endif
+
+  // Common code below attempts to push single registers at a time,
+  // which breaks the stack pointer's 16-byte alignment requirement.
+  // Note that movePtr() is invalid because StackPointer is treated as xzr.
+  //
+  // FIXME: After testing, this entire function should be rewritten to not
+  // use the PseudoStackPointer: since the amount of data pushed is
+  // precalculated, we can just allocate the whole frame header at once and
+  // index off sp. This will save a significant number of instructions where
+  // Push() updates sp.
+  masm.Mov(PseudoStackPointer64, sp);
+  masm.SetStackPointer64(PseudoStackPointer64);
+
+  // Remember stack depth without padding and arguments.
+  masm.moveStackPtrTo(r19);
+
+  // If constructing, include newTarget in argument vector.
+  {
+    Label noNewTarget;
+    Imm32 constructingToken(CalleeToken_FunctionConstructing);
+    masm.branchTest32(Assembler::Zero, reg_callee, constructingToken,
+                      &noNewTarget);
+    masm.add32(Imm32(1), reg_argc);
+    masm.bind(&noNewTarget);
+  }
+
+  // JitFrameLayout is as follows (higher is higher in memory):
+  //  N*8  - [ JS argument vector ] (base 16-byte aligned)
+  //  8    - calleeToken
+  //  8    - frameDescriptor (16-byte aligned)
+  //  8    - returnAddress
+  //  8    - frame pointer (16-byte aligned, pushed by callee)
+
+  // Touch frame incrementally (a requirement for Windows).
+  //
+  // Use already saved callee-save registers r20 and r21 as temps.
+  //
+  // This has to be done outside the ScratchRegisterScope, as the temps are
+  // under demand inside the touchFrameValues call.
+
+  // Give sp 16-byte alignment and sync stack pointers.
+  masm.andToStackPtr(Imm32(~0xf));
+  // We needn't worry about the Gecko Profiler mark because touchFrameValues
+  // touches in large increments.
+  masm.touchFrameValues(reg_argc, r20, r21);
+  // Restore stack pointer, preserved above.
+  masm.moveToStackPtr(r19);
+
+  // Push the argument vector onto the stack.
+  // WARNING: destructively modifies reg_argv
+  {
+    vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+
+    const ARMRegister tmp_argc = temps.AcquireX();
+    const ARMRegister tmp_sp = temps.AcquireX();
+
+    Label noArguments;
+    Label loopHead;
+
+    masm.movePtr(reg_argc, tmp_argc.asUnsized());
+
+    // sp -= 8
+    // Since we're using PostIndex Str below, this is necessary to avoid
+    // overwriting the Gecko Profiler mark pushed above.
+    masm.subFromStackPtr(Imm32(8));
+
+    // sp -= 8 * argc
+    masm.Sub(PseudoStackPointer64, PseudoStackPointer64,
+             Operand(tmp_argc, vixl::SXTX, 3));
+
+    // Give sp 16-byte alignment and sync stack pointers.
+    masm.andToStackPtr(Imm32(~0xf));
+    masm.moveStackPtrTo(tmp_sp.asUnsized());
+
+    masm.branchTestPtr(Assembler::Zero, reg_argc, reg_argc, &noArguments);
+
+    // Begin argument-pushing loop.
+    // This could be optimized using Ldp and Stp.
+    {
+      masm.bind(&loopHead);
+
+      // Load an argument from argv, then increment argv by 8.
+      masm.Ldr(x24, MemOperand(ARMRegister(reg_argv, 64), Operand(8),
+                               vixl::PostIndex));
+
+      // Store the argument to tmp_sp, then increment tmp_sp by 8.
+      masm.Str(x24, MemOperand(tmp_sp, Operand(8), vixl::PostIndex));
+
+      // Decrement tmp_argc and set the condition codes for the new value.
+      masm.Subs(tmp_argc, tmp_argc, Operand(1));
+
+      // Branch if arguments remain.
+      masm.B(&loopHead, vixl::Condition::NonZero);
+    }
+
+    masm.bind(&noArguments);
+  }
+  masm.checkStackAlignment();
+
+  // Push the calleeToken and the frame descriptor.
+  // The result address is used to store the actual number of arguments
+  // without adding an argument to EnterJIT.
+  {
+    vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+    MOZ_ASSERT(temps.IsAvailable(ScratchReg64));  // ip0
+    temps.Exclude(ScratchReg64);
+    Register scratch = ScratchReg64.asUnsized();
+    masm.push(reg_callee);
+
+    // Push the descriptor.
+    masm.unboxInt32(Address(reg_vp, 0x0), scratch);
+    masm.PushFrameDescriptorForJitCall(FrameType::CppToJSJit, scratch, scratch);
+  }
+  masm.checkStackAlignment();
+
+  Label osrReturnPoint;
+  {
+    // Check for Interpreter -> Baseline OSR.
+
+    AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
+    MOZ_ASSERT(!regs.has(FramePointer));
+    regs.take(OsrFrameReg);
+    regs.take(reg_code);
+    regs.take(reg_osrNStack);
+    MOZ_ASSERT(!regs.has(ReturnReg), "ReturnReg matches reg_code");
+
+    Label notOsr;
+    masm.branchTestPtr(Assembler::Zero, OsrFrameReg, OsrFrameReg, &notOsr);
+
+    Register scratch = regs.takeAny();
+
+    // Frame prologue.
+    masm.Adr(ARMRegister(scratch, 64), &osrReturnPoint);
+    masm.push(scratch, FramePointer);
+    masm.moveStackPtrTo(FramePointer);
+
+    // Reserve frame.
+    masm.subFromStackPtr(Imm32(BaselineFrame::Size()));
+
+    Register framePtrScratch = regs.takeAny();
+    masm.touchFrameValues(reg_osrNStack, scratch, framePtrScratch);
+    masm.moveStackPtrTo(framePtrScratch);
+
+    // Reserve space for locals and stack values.
+    // scratch = num_stack_values * sizeof(Value).
+    masm.Lsl(ARMRegister(scratch, 32), ARMRegister(reg_osrNStack, 32), 3);
+    masm.subFromStackPtr(scratch);
+
+    // Enter exit frame.
+    masm.pushFrameDescriptor(FrameType::BaselineJS);
+    masm.push(xzr);  // Push xzr for a fake return address.
+    masm.push(FramePointer);
+    // No GC things to mark: push a bare token.
+    masm.loadJSContext(scratch);
+    masm.enterFakeExitFrame(scratch, scratch, ExitFrameType::Bare);
+
+    masm.push(reg_code);
+
+    // Initialize the frame, including filling in the slots.
+    using Fn = bool (*)(BaselineFrame * frame, InterpreterFrame * interpFrame,
+                        uint32_t numStackValues);
+    masm.setupUnalignedABICall(r19);
+    masm.passABIArg(framePtrScratch);  // BaselineFrame.
+    masm.passABIArg(reg_osrFrame);     // InterpreterFrame.
+    masm.passABIArg(reg_osrNStack);
+    masm.callWithABI<Fn, jit::InitBaselineFrameForOsr>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+    masm.pop(scratch);
+    MOZ_ASSERT(scratch != ReturnReg);
+
+    masm.addToStackPtr(Imm32(ExitFrameLayout::SizeWithFooter()));
+
+    Label error;
+    masm.branchIfFalseBool(ReturnReg, &error);
+
+    // If OSR-ing, then emit instrumentation for setting lastProfilerFrame
+    // if profiler instrumentation is enabled.
+    {
+      Label skipProfilingInstrumentation;
+      AbsoluteAddress addressOfEnabled(
+          cx->runtime()->geckoProfiler().addressOfEnabled());
+      masm.branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
+                    &skipProfilingInstrumentation);
+      masm.profilerEnterFrame(FramePointer, regs.getAny());
+      masm.bind(&skipProfilingInstrumentation);
+    }
+
+    masm.jump(scratch);
+
+    // OOM: frame epilogue, load error value, discard return address and return.
+    masm.bind(&error);
+    masm.moveToStackPtr(FramePointer);
+    masm.pop(FramePointer);
+    masm.addToStackPtr(Imm32(sizeof(uintptr_t)));  // Return address.
+    masm.syncStackPtr();
+    masm.moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
+    masm.B(&osrReturnPoint);
+
+    masm.bind(&notOsr);
+    masm.movePtr(reg_scope, R1_);
+  }
+
+  // The callee will push the return address and frame pointer on the stack,
+  // thus we check that the stack would be aligned once the call is complete.
+  masm.assertStackAlignment(JitStackAlignment, 2 * sizeof(uintptr_t));
+
+  // Call function.
+  // Since AArch64 doesn't have the pc register available, the callee must push
+  // lr.
+  masm.callJitNoProfiler(reg_code);
+
+  // Interpreter -> Baseline OSR will return here.
+  masm.bind(&osrReturnPoint);
+
+  // Discard arguments and padding. Set sp to the address of the saved
+  // registers. In debug builds we have to include the two stack canaries
+  // checked below.
+#ifdef DEBUG
+  static constexpr size_t SavedRegSize = 22 * sizeof(void*);
+#else
+  static constexpr size_t SavedRegSize = 20 * sizeof(void*);
+#endif
+  masm.computeEffectiveAddress(Address(FramePointer, -int32_t(SavedRegSize)),
+                               masm.getStackPointer());
+
+  masm.syncStackPtr();
+  masm.SetStackPointer64(sp);
+
+#ifdef DEBUG
+  // Check that canaries placed on function entry are still present.
+  masm.pop(r24, r23);
+  Label x23OK, x24OK;
+
+  masm.branchPtr(Assembler::Equal, r23, ImmWord(0xdeadd00d), &x23OK);
+  masm.breakpoint();
+  masm.bind(&x23OK);
+
+  masm.branchPtr(Assembler::Equal, r24, ImmWord(0xdeadd11d), &x24OK);
+  masm.breakpoint();
+  masm.bind(&x24OK);
+#endif
+
+  // Restore callee-save floating-point registers.
+  masm.pop(d15, d14, d13, d12);
+  masm.pop(d11, d10, d9, d8);
+
+  // Restore callee-save integer registers.
+  // Also restore x7 (reg_vp) and x30 (lr).
+  masm.pop(r30, r7, r28, r27);
+  masm.pop(r26, r25, r24, r23);
+  masm.pop(r22, r21, r20, r19);
+
+  // Store return value (in JSReturnReg = x2 to just-popped reg_vp).
+  masm.storeValue(JSReturnOperand, Address(reg_vp, 0));
+
+  // Restore old frame pointer.
+  masm.pop(r29, r30);
+
+  // Return using the value popped into x30.
+  masm.abiret();
+
+  // Reset stack pointer.
+  masm.SetStackPointer64(PseudoStackPointer64);
+}
+
+// static
+mozilla::Maybe<::JS::ProfilingFrameIterator::RegisterState>
+JitRuntime::getCppEntryRegisters(JitFrameLayout* frameStackAddress) {
+  // Not supported, or not implemented yet.
+  // TODO: Implement along with the corresponding stack-walker changes, in
+  // coordination with the Gecko Profiler, see bug 1635987 and follow-ups.
+  return mozilla::Nothing{};
+}
+
+static void PushRegisterDump(MacroAssembler& masm) {
+  const LiveRegisterSet First28GeneralRegisters = LiveRegisterSet(
+      GeneralRegisterSet(Registers::AllMask &
+                         ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28)),
+      FloatRegisterSet(FloatRegisters::NoneMask));
+
+  const LiveRegisterSet AllFloatRegisters =
+      LiveRegisterSet(GeneralRegisterSet(Registers::NoneMask),
+                      FloatRegisterSet(FloatRegisters::AllMask));
+
+  // Push all general-purpose registers.
+  //
+  // The ARM64 ABI does not treat SP as a normal register that can
+  // be pushed. So pushing happens in two phases.
+  //
+  // Registers are pushed in reverse order of code.
+  //
+  // See block comment in MacroAssembler.h for further required invariants.
+
+  // First, push the last four registers, passing zero for sp.
+  // Zero is pushed for x28 and x31: the pseudo-SP and SP, respectively.
+  masm.asVIXL().Push(xzr, x30, x29, xzr);
+
+  // Second, push the first 28 registers that serve no special purpose.
+  masm.PushRegsInMask(First28GeneralRegisters);
+
+  // Finally, push all floating-point registers, completing the RegisterDump.
+  masm.PushRegsInMask(AllFloatRegisters);
+}
+
+void JitRuntime::generateInvalidator(MacroAssembler& masm, Label* bailoutTail) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateInvalidator");
+
+  invalidatorOffset_ = startTrampolineCode(masm);
+
+  // The InvalidationBailoutStack saved in r0 must be:
+  // - osiPointReturnAddress_
+  // - ionScript_  (pushed by CodeGeneratorARM64::generateInvalidateEpilogue())
+  // - regs_  (pushed here)
+  // - fpregs_  (pushed here) [=r0]
+  PushRegisterDump(masm);
+  masm.moveStackPtrTo(r0);
+
+  // Reserve space for InvalidationBailout's bailoutInfo outparam.
+  masm.Sub(x1, masm.GetStackPointer64(), Operand(sizeof(void*)));
+  masm.moveToStackPtr(r1);
+
+  using Fn =
+      bool (*)(InvalidationBailoutStack * sp, BaselineBailoutInfo * *info);
+  masm.setupUnalignedABICall(r10);
+  masm.passABIArg(r0);
+  masm.passABIArg(r1);
+
+  masm.callWithABI<Fn, InvalidationBailout>(
+      MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+
+  masm.pop(r2);  // Get the bailoutInfo outparam.
+
+  // Pop the machine state and the dead frame.
+  masm.moveToStackPtr(FramePointer);
+
+  // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
+  masm.jump(bailoutTail);
+}
+
+void JitRuntime::generateArgumentsRectifier(MacroAssembler& masm,
+                                            ArgumentsRectifierKind kind) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateArgumentsRectifier");
+
+  switch (kind) {
+    case ArgumentsRectifierKind::Normal:
+      argumentsRectifierOffset_ = startTrampolineCode(masm);
+      break;
+    case ArgumentsRectifierKind::TrialInlining:
+      trialInliningArgumentsRectifierOffset_ = startTrampolineCode(masm);
+      break;
+  }
+
+  // Save the return address for later.
+  masm.push(lr);
+
+  // Frame prologue.
+  //
+  // NOTE: if this changes, fix the Baseline bailout code too!
+  // See BaselineStackBuilder::calculatePrevFramePtr and
+  // BaselineStackBuilder::buildRectifierFrame (in BaselineBailouts.cpp).
+  masm.push(FramePointer);
+  masm.moveStackPtrTo(FramePointer);
+
+  // Load the information that the rectifier needs from the stack.
+  masm.loadNumActualArgs(FramePointer, r0);
+  masm.loadPtr(
+      Address(FramePointer, RectifierFrameLayout::offsetOfCalleeToken()), r1);
+
+  // Extract a JSFunction pointer from the callee token and keep the
+  // intermediary to avoid later recalculation.
+  masm.And(x5, x1, Operand(CalleeTokenMask));
+
+  // Get the arguments from the function object.
+  masm.loadFunctionArgCount(x5.asUnsized(), x6.asUnsized());
+
+  static_assert(CalleeToken_FunctionConstructing == 0x1,
+                "Constructing must be low-order bit");
+  masm.And(x4, x1, Operand(CalleeToken_FunctionConstructing));
+  masm.Add(x7, x6, x4);
+
+  // Copy the number of actual arguments into r8.
+  masm.mov(r0, r8);
+
+  // Calculate the position that our arguments are at before sp gets modified.
+  masm.Add(x3, masm.GetStackPointer64(), Operand(x8, vixl::LSL, 3));
+  masm.Add(x3, x3, Operand(sizeof(RectifierFrameLayout)));
+
+  // If the number of Values without |this| is even, push 8 padding bytes to
+  // ensure the stack is 16-byte aligned.
+  Label noPadding;
+  masm.Tbnz(x7, 0, &noPadding);
+  masm.asVIXL().Push(xzr);
+  masm.bind(&noPadding);
+
+  {
+    Label notConstructing;
+    masm.Cbz(x4, &notConstructing);
+
+    // new.target lives at the end of the pushed args
+    // NB: The arg vector holder starts at the beginning of the last arg,
+    //     add a value to get to argv[argc]
+    masm.loadPtr(Address(r3, sizeof(Value)), r4);
+    masm.Push(r4);
+
+    masm.bind(&notConstructing);
+  }
+
+  // Calculate the number of undefineds that need to be pushed.
+  masm.Sub(w2, w6, w8);
+
+  // Put an undefined in a register so it can be pushed.
+  masm.moveValue(UndefinedValue(), ValueOperand(r4));
+
+  // Push undefined N times.
+  {
+    Label undefLoopTop;
+    masm.bind(&undefLoopTop);
+    masm.Push(r4);
+    masm.Subs(w2, w2, Operand(1));
+    masm.B(&undefLoopTop, Assembler::NonZero);
+  }
+
+  // Arguments copy loop. Copy for x8 >= 0 to include |this|.
+  {
+    Label copyLoopTop;
+    masm.bind(&copyLoopTop);
+    masm.Ldr(x4, MemOperand(x3, -sizeof(Value), vixl::PostIndex));
+    masm.Push(r4);
+    masm.Subs(x8, x8, Operand(1));
+    masm.B(&copyLoopTop, Assembler::NotSigned);
+  }
+
+  masm.push(r1);  // Callee token.
+  masm.pushFrameDescriptorForJitCall(FrameType::Rectifier, r0, r0);
+
+  // Call the target function.
+  switch (kind) {
+    case ArgumentsRectifierKind::Normal:
+      masm.loadJitCodeRaw(r5, r3);
+      argumentsRectifierReturnOffset_ = masm.callJitNoProfiler(r3);
+      break;
+    case ArgumentsRectifierKind::TrialInlining:
+      Label noBaselineScript, done;
+      masm.loadBaselineJitCodeRaw(r5, r3, &noBaselineScript);
+      masm.callJitNoProfiler(r3);
+      masm.jump(&done);
+
+      // See BaselineCacheIRCompiler::emitCallInlinedFunction.
+      masm.bind(&noBaselineScript);
+      masm.loadJitCodeRaw(r5, r3);
+      masm.callJitNoProfiler(r3);
+      masm.bind(&done);
+      break;
+  }
+
+  masm.moveToStackPtr(FramePointer);
+  masm.pop(FramePointer);
+  masm.ret();
+}
+
+static void PushBailoutFrame(MacroAssembler& masm, Register spArg) {
+  // This assumes no SIMD registers, as JS does not support SIMD.
+
+  // The stack saved in spArg must be (higher entries have higher memory
+  // addresses):
+  // - snapshotOffset_
+  // - frameSize_
+  // - regs_
+  // - fpregs_ (spArg + 0)
+  PushRegisterDump(masm);
+  masm.moveStackPtrTo(spArg);
+}
+
+static void GenerateBailoutThunk(MacroAssembler& masm, Label* bailoutTail) {
+  PushBailoutFrame(masm, r0);
+
+  // SP % 8 == 4
+  // STEP 1c: Call the bailout function, giving a pointer to the
+  //          structure we just blitted onto the stack.
+  // Make space for the BaselineBailoutInfo* outparam.
+  masm.reserveStack(sizeof(void*));
+  masm.moveStackPtrTo(r1);
+
+  using Fn = bool (*)(BailoutStack * sp, BaselineBailoutInfo * *info);
+  masm.setupUnalignedABICall(r2);
+  masm.passABIArg(r0);
+  masm.passABIArg(r1);
+  masm.callWithABI<Fn, Bailout>(MoveOp::GENERAL,
+                                CheckUnsafeCallWithABI::DontCheckOther);
+
+  // Get the bailoutInfo outparam.
+  masm.pop(r2);
+
+  // Remove both the bailout frame and the topmost Ion frame's stack.
+  masm.moveToStackPtr(FramePointer);
+
+  // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
+  masm.jump(bailoutTail);
+}
+
+void JitRuntime::generateBailoutHandler(MacroAssembler& masm,
+                                        Label* bailoutTail) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateBailoutHandler");
+
+  bailoutHandlerOffset_ = startTrampolineCode(masm);
+
+  GenerateBailoutThunk(masm, bailoutTail);
+}
+
+bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm,
+                                   const VMFunctionData& f, DynFn nativeFun,
+                                   uint32_t* wrapperOffset) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateVMWrapper");
+
+  *wrapperOffset = startTrampolineCode(masm);
+
+  // Avoid conflicts with argument registers while discarding the result after
+  // the function call.
+  AllocatableGeneralRegisterSet regs(Register::Codes::WrapperMask);
+
+  static_assert(
+      (Register::Codes::VolatileMask & ~Register::Codes::WrapperMask) == 0,
+      "Wrapper register set must be a superset of the Volatile register set.");
+
+  // Unlike on other platforms, it is the responsibility of the VM *callee* to
+  // push the return address, while the caller must ensure that the address
+  // is stored in lr on entry. This allows the VM wrapper to work with both
+  // direct calls and tail calls.
+  masm.push(lr);
+
+  // First argument is the JSContext.
+  Register reg_cx = IntArgReg0;
+  regs.take(reg_cx);
+
+  // Stack is:
+  //    ... frame ...
+  //  +12 [args]
+  //  +8  descriptor
+  //  +0  returnAddress (pushed by this function, caller sets as lr)
+  //
+  // Push the frame pointer to finish the exit frame, then link it up.
+  masm.Push(FramePointer);
+  masm.moveStackPtrTo(FramePointer);
+  masm.loadJSContext(reg_cx);
+  masm.enterExitFrame(reg_cx, regs.getAny(), &f);
+
+  // Save the current stack pointer as the base for copying arguments.
+  Register argsBase = InvalidReg;
+  if (f.explicitArgs) {
+    // argsBase can't be an argument register. Bad things would happen if
+    // the MoveResolver didn't throw an assertion failure first.
+    argsBase = r8;
+    regs.take(argsBase);
+    masm.Add(ARMRegister(argsBase, 64), masm.GetStackPointer64(),
+             Operand(ExitFrameLayout::SizeWithFooter()));
+  }
+
+  // Reserve space for any outparameter.
+  Register outReg = InvalidReg;
+  switch (f.outParam) {
+    case Type_Value:
+      outReg = regs.takeAny();
+      masm.reserveStack(sizeof(Value));
+      masm.moveStackPtrTo(outReg);
+      break;
+
+    case Type_Handle:
+      outReg = regs.takeAny();
+      masm.PushEmptyRooted(f.outParamRootType);
+      masm.moveStackPtrTo(outReg);
+      break;
+
+    case Type_Int32:
+    case Type_Bool:
+      outReg = regs.takeAny();
+      masm.reserveStack(sizeof(int64_t));
+      masm.moveStackPtrTo(outReg);
+      break;
+
+    case Type_Double:
+      outReg = regs.takeAny();
+      masm.reserveStack(sizeof(double));
+      masm.moveStackPtrTo(outReg);
+      break;
+
+    case Type_Pointer:
+      outReg = regs.takeAny();
+      masm.reserveStack(sizeof(uintptr_t));
+      masm.moveStackPtrTo(outReg);
+      break;
+
+    default:
+      MOZ_ASSERT(f.outParam == Type_Void);
+      break;
+  }
+
+  masm.setupUnalignedABICall(regs.getAny());
+  masm.passABIArg(reg_cx);
+
+  size_t argDisp = 0;
+
+  // Copy arguments.
+  for (uint32_t explicitArg = 0; explicitArg < f.explicitArgs; explicitArg++) {
+    switch (f.argProperties(explicitArg)) {
+      case VMFunctionData::WordByValue:
+        masm.passABIArg(MoveOperand(argsBase, argDisp),
+                        (f.argPassedInFloatReg(explicitArg) ? MoveOp::DOUBLE
+                                                            : MoveOp::GENERAL));
+        argDisp += sizeof(void*);
+        break;
+
+      case VMFunctionData::WordByRef:
+        masm.passABIArg(
+            MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress),
+            MoveOp::GENERAL);
+        argDisp += sizeof(void*);
+        break;
+
+      case VMFunctionData::DoubleByValue:
+      case VMFunctionData::DoubleByRef:
+        MOZ_CRASH("NYI: AArch64 callVM should not be used with 128bit values.");
+    }
+  }
+
+  // Copy the semi-implicit outparam, if any.
+  // It is not a C++-abi outparam, which would get passed in the
+  // outparam register, but a real parameter to the function, which
+  // was stack-allocated above.
+  if (outReg != InvalidReg) {
+    masm.passABIArg(outReg);
+  }
+
+  masm.callWithABI(nativeFun, MoveOp::GENERAL,
+                   CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+  // SP is used to transfer stack across call boundaries.
+  masm.initPseudoStackPtr();
+
+  // Test for failure.
+  switch (f.failType()) {
+    case Type_Cell:
+      masm.branchTestPtr(Assembler::Zero, r0, r0, masm.failureLabel());
+      break;
+    case Type_Bool:
+      masm.branchIfFalseBool(r0, masm.failureLabel());
+      break;
+    case Type_Void:
+      break;
+    default:
+      MOZ_CRASH("unknown failure kind");
+  }
+
+  // Load the outparam and free any allocated stack.
+  switch (f.outParam) {
+    case Type_Value:
+      masm.Ldr(ARMRegister(JSReturnReg, 64),
+               MemOperand(masm.GetStackPointer64()));
+      masm.freeStack(sizeof(Value));
+      break;
+
+    case Type_Handle:
+      masm.popRooted(f.outParamRootType, ReturnReg, JSReturnOperand);
+      break;
+
+    case Type_Int32:
+      masm.Ldr(ARMRegister(ReturnReg, 32),
+               MemOperand(masm.GetStackPointer64()));
+      masm.freeStack(sizeof(int64_t));
+      break;
+
+    case Type_Bool:
+      masm.Ldrb(ARMRegister(ReturnReg, 32),
+                MemOperand(masm.GetStackPointer64()));
+      masm.freeStack(sizeof(int64_t));
+      break;
+
+    case Type_Double:
+      masm.Ldr(ARMFPRegister(ReturnDoubleReg, 64),
+               MemOperand(masm.GetStackPointer64()));
+      masm.freeStack(sizeof(double));
+      break;
+
+    case Type_Pointer:
+      masm.Ldr(ARMRegister(ReturnReg, 64),
+               MemOperand(masm.GetStackPointer64()));
+      masm.freeStack(sizeof(uintptr_t));
+      break;
+
+    default:
+      MOZ_ASSERT(f.outParam == Type_Void);
+      break;
+  }
+
+  // Until C++ code is instrumented against Spectre, prevent speculative
+  // execution from returning any private data.
+  if (f.returnsData() && JitOptions.spectreJitToCxxCalls) {
+    masm.speculationBarrier();
+  }
+
+  // Pop ExitFooterFrame and the frame pointer.
+  masm.leaveExitFrame(0);
+  masm.pop(FramePointer);
+
+  // Return. Subtract sizeof(void*) for the frame pointer.
+  masm.retn(Imm32(sizeof(ExitFrameLayout) - sizeof(void*) +
+                  f.explicitStackSlots() * sizeof(void*) +
+                  f.extraValuesToPop * sizeof(Value)));
+
+  return true;
+}
+
+uint32_t JitRuntime::generatePreBarrier(JSContext* cx, MacroAssembler& masm,
+                                        MIRType type) {
+  AutoCreatedBy acb(masm, "JitRuntime::generatePreBarrier");
+
+  uint32_t offset = startTrampolineCode(masm);
+
+  static_assert(PreBarrierReg == r1);
+  Register temp1 = r2;
+  Register temp2 = r3;
+  Register temp3 = r4;
+  masm.push(temp1);
+  masm.push(temp2);
+  masm.push(temp3);
+
+  Label noBarrier;
+  masm.emitPreBarrierFastPath(cx->runtime(), type, temp1, temp2, temp3,
+                              &noBarrier);
+
+  // Call into C++ to mark this GC thing.
+  masm.pop(temp3);
+  masm.pop(temp2);
+  masm.pop(temp1);
+
+  LiveRegisterSet regs =
+      LiveRegisterSet(GeneralRegisterSet(Registers::VolatileMask),
+                      FloatRegisterSet(FloatRegisters::VolatileMask));
+
+  // Also preserve the return address.
+  regs.add(lr);
+
+  masm.PushRegsInMask(regs);
+
+  masm.movePtr(ImmPtr(cx->runtime()), r3);
+
+  masm.setupUnalignedABICall(r0);
+  masm.passABIArg(r3);
+  masm.passABIArg(PreBarrierReg);
+  masm.callWithABI(JitPreWriteBarrier(type));
+
+  // Pop the volatile regs and restore LR.
+  masm.PopRegsInMask(regs);
+  masm.abiret();
+
+  masm.bind(&noBarrier);
+  masm.pop(temp3);
+  masm.pop(temp2);
+  masm.pop(temp1);
+  masm.abiret();
+
+  return offset;
+}
+
+void JitRuntime::generateBailoutTailStub(MacroAssembler& masm,
+                                         Label* bailoutTail) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateBailoutTailStub");
+
+  masm.bind(bailoutTail);
+  masm.generateBailoutTail(r1, r2);
+}
diff --git a/js/src/jit/arm64/vixl/.clang-format b/js/src/jit/arm64/vixl/.clang-format
new file mode 100644
index 0000000000..122a79540d
--- /dev/null
+++ b/js/src/jit/arm64/vixl/.clang-format
@@ -0,0 +1,4 @@
+BasedOnStyle: Chromium
+
+# Ignore all comments because they aren't reflowed properly.
+CommentPragmas: "^"
diff --git a/js/src/jit/arm64/vixl/AUTHORS b/js/src/jit/arm64/vixl/AUTHORS
new file mode 100644
index 0000000000..257ec9d32b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/AUTHORS
@@ -0,0 +1,8 @@
+# Below is a list of people and organisations that have contributed to the VIXL
+# project. Entries should be added to the list as:
+#
+#   Name/Organization <email address>
+
+ARM Ltd. <*@arm.com>
+Google Inc. <*@google.com>
+Linaro <*@linaro.org>
diff --git a/js/src/jit/arm64/vixl/Assembler-vixl.cpp b/js/src/jit/arm64/vixl/Assembler-vixl.cpp
new file mode 100644
index 0000000000..6ed31cef78
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Assembler-vixl.cpp
@@ -0,0 +1,5318 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+
+#include <cmath>
+
+#include "jit/arm64/vixl/MacroAssembler-vixl.h"
+
+namespace vixl {
+
+// CPURegList utilities.
+CPURegister CPURegList::PopLowestIndex() {
+  if (IsEmpty()) {
+    return NoCPUReg;
+  }
+  int index = CountTrailingZeros(list_);
+  VIXL_ASSERT((1ULL << index) & list_);
+  Remove(index);
+  return CPURegister(index, size_, type_);
+}
+
+
+CPURegister CPURegList::PopHighestIndex() {
+  VIXL_ASSERT(IsValid());
+  if (IsEmpty()) {
+    return NoCPUReg;
+  }
+  int index = CountLeadingZeros(list_);
+  index = kRegListSizeInBits - 1 - index;
+  VIXL_ASSERT((1ULL << index) & list_);
+  Remove(index);
+  return CPURegister(index, size_, type_);
+}
+
+
+bool CPURegList::IsValid() const {
+  if ((type_ == CPURegister::kRegister) ||
+      (type_ == CPURegister::kVRegister)) {
+    bool is_valid = true;
+    // Try to create a CPURegister for each element in the list.
+    for (int i = 0; i < kRegListSizeInBits; i++) {
+      if (((list_ >> i) & 1) != 0) {
+        is_valid &= CPURegister(i, size_, type_).IsValid();
+      }
+    }
+    return is_valid;
+  } else if (type_ == CPURegister::kNoRegister) {
+    // We can't use IsEmpty here because that asserts IsValid().
+    return list_ == 0;
+  } else {
+    return false;
+  }
+}
+
+
+void CPURegList::RemoveCalleeSaved() {
+  if (type() == CPURegister::kRegister) {
+    Remove(GetCalleeSaved(RegisterSizeInBits()));
+  } else if (type() == CPURegister::kVRegister) {
+    Remove(GetCalleeSavedV(RegisterSizeInBits()));
+  } else {
+    VIXL_ASSERT(type() == CPURegister::kNoRegister);
+    VIXL_ASSERT(IsEmpty());
+    // The list must already be empty, so do nothing.
+  }
+}
+
+
+CPURegList CPURegList::Union(const CPURegList& list_1,
+                             const CPURegList& list_2,
+                             const CPURegList& list_3) {
+  return Union(list_1, Union(list_2, list_3));
+}
+
+
+CPURegList CPURegList::Union(const CPURegList& list_1,
+                             const CPURegList& list_2,
+                             const CPURegList& list_3,
+                             const CPURegList& list_4) {
+  return Union(Union(list_1, list_2), Union(list_3, list_4));
+}
+
+
+CPURegList CPURegList::Intersection(const CPURegList& list_1,
+                                    const CPURegList& list_2,
+                                    const CPURegList& list_3) {
+  return Intersection(list_1, Intersection(list_2, list_3));
+}
+
+
+CPURegList CPURegList::Intersection(const CPURegList& list_1,
+                                    const CPURegList& list_2,
+                                    const CPURegList& list_3,
+                                    const CPURegList& list_4) {
+  return Intersection(Intersection(list_1, list_2),
+                      Intersection(list_3, list_4));
+}
+
+
+CPURegList CPURegList::GetCalleeSaved(unsigned size) {
+  return CPURegList(CPURegister::kRegister, size, 19, 29);
+}
+
+
+CPURegList CPURegList::GetCalleeSavedV(unsigned size) {
+  return CPURegList(CPURegister::kVRegister, size, 8, 15);
+}
+
+
+CPURegList CPURegList::GetCallerSaved(unsigned size) {
+  // Registers x0-x18 and lr (x30) are caller-saved.
+  CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 18);
+  // Do not use lr directly to avoid initialisation order fiasco bugs for users.
+  list.Combine(Register(30, kXRegSize));
+  return list;
+}
+
+
+CPURegList CPURegList::GetCallerSavedV(unsigned size) {
+  // Registers d0-d7 and d16-d31 are caller-saved.
+  CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7);
+  list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31));
+  return list;
+}
+
+
+const CPURegList kCalleeSaved = CPURegList::GetCalleeSaved();
+const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV();
+const CPURegList kCallerSaved = CPURegList::GetCallerSaved();
+const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV();
+
+
+// Registers.
+#define WREG(n) w##n,
+const Register Register::wregisters[] = {
+REGISTER_CODE_LIST(WREG)
+};
+#undef WREG
+
+#define XREG(n) x##n,
+const Register Register::xregisters[] = {
+REGISTER_CODE_LIST(XREG)
+};
+#undef XREG
+
+#define BREG(n) b##n,
+const VRegister VRegister::bregisters[] = {
+REGISTER_CODE_LIST(BREG)
+};
+#undef BREG
+
+#define HREG(n) h##n,
+const VRegister VRegister::hregisters[] = {
+REGISTER_CODE_LIST(HREG)
+};
+#undef HREG
+
+#define SREG(n) s##n,
+const VRegister VRegister::sregisters[] = {
+REGISTER_CODE_LIST(SREG)
+};
+#undef SREG
+
+#define DREG(n) d##n,
+const VRegister VRegister::dregisters[] = {
+REGISTER_CODE_LIST(DREG)
+};
+#undef DREG
+
+#define QREG(n) q##n,
+const VRegister VRegister::qregisters[] = {
+REGISTER_CODE_LIST(QREG)
+};
+#undef QREG
+
+#define VREG(n) v##n,
+const VRegister VRegister::vregisters[] = {
+REGISTER_CODE_LIST(VREG)
+};
+#undef VREG
+
+
+const Register& Register::WRegFromCode(unsigned code) {
+  if (code == kSPRegInternalCode) {
+    return wsp;
+  } else {
+    VIXL_ASSERT(code < kNumberOfRegisters);
+    return wregisters[code];
+  }
+}
+
+
+const Register& Register::XRegFromCode(unsigned code) {
+  if (code == kSPRegInternalCode) {
+    return sp;
+  } else {
+    VIXL_ASSERT(code < kNumberOfRegisters);
+    return xregisters[code];
+  }
+}
+
+
+const VRegister& VRegister::BRegFromCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfVRegisters);
+  return bregisters[code];
+}
+
+
+const VRegister& VRegister::HRegFromCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfVRegisters);
+  return hregisters[code];
+}
+
+
+const VRegister& VRegister::SRegFromCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfVRegisters);
+  return sregisters[code];
+}
+
+
+const VRegister& VRegister::DRegFromCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfVRegisters);
+  return dregisters[code];
+}
+
+
+const VRegister& VRegister::QRegFromCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfVRegisters);
+  return qregisters[code];
+}
+
+
+const VRegister& VRegister::VRegFromCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfVRegisters);
+  return vregisters[code];
+}
+
+
+const Register& CPURegister::W() const {
+  VIXL_ASSERT(IsValidRegister());
+  return Register::WRegFromCode(code_);
+}
+
+
+const Register& CPURegister::X() const {
+  VIXL_ASSERT(IsValidRegister());
+  return Register::XRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::B() const {
+  VIXL_ASSERT(IsValidVRegister());
+  return VRegister::BRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::H() const {
+  VIXL_ASSERT(IsValidVRegister());
+  return VRegister::HRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::S() const {
+  VIXL_ASSERT(IsValidVRegister());
+  return VRegister::SRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::D() const {
+  VIXL_ASSERT(IsValidVRegister());
+  return VRegister::DRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::Q() const {
+  VIXL_ASSERT(IsValidVRegister());
+  return VRegister::QRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::V() const {
+  VIXL_ASSERT(IsValidVRegister());
+  return VRegister::VRegFromCode(code_);
+}
+
+
+// Operand.
+Operand::Operand(int64_t immediate)
+    : immediate_(immediate),
+      reg_(NoReg),
+      shift_(NO_SHIFT),
+      extend_(NO_EXTEND),
+      shift_amount_(0) {}
+
+
+Operand::Operand(Register reg, Shift shift, unsigned shift_amount)
+    : reg_(reg),
+      shift_(shift),
+      extend_(NO_EXTEND),
+      shift_amount_(shift_amount) {
+  VIXL_ASSERT(shift != MSL);
+  VIXL_ASSERT(reg.Is64Bits() || (shift_amount < kWRegSize));
+  VIXL_ASSERT(reg.Is32Bits() || (shift_amount < kXRegSize));
+  VIXL_ASSERT(!reg.IsSP());
+}
+
+
+Operand::Operand(Register reg, Extend extend, unsigned shift_amount)
+    : reg_(reg),
+      shift_(NO_SHIFT),
+      extend_(extend),
+      shift_amount_(shift_amount) {
+  VIXL_ASSERT(reg.IsValid());
+  VIXL_ASSERT(shift_amount <= 4);
+  VIXL_ASSERT(!reg.IsSP());
+
+  // Extend modes SXTX and UXTX require a 64-bit register.
+  VIXL_ASSERT(reg.Is64Bits() || ((extend != SXTX) && (extend != UXTX)));
+}
+
+
+bool Operand::IsImmediate() const {
+  return reg_.Is(NoReg);
+}
+
+
+bool Operand::IsShiftedRegister() const {
+  return reg_.IsValid() && (shift_ != NO_SHIFT);
+}
+
+
+bool Operand::IsExtendedRegister() const {
+  return reg_.IsValid() && (extend_ != NO_EXTEND);
+}
+
+
+bool Operand::IsZero() const {
+  if (IsImmediate()) {
+    return immediate() == 0;
+  } else {
+    return reg().IsZero();
+  }
+}
+
+
+Operand Operand::ToExtendedRegister() const {
+  VIXL_ASSERT(IsShiftedRegister());
+  VIXL_ASSERT((shift_ == LSL) && (shift_amount_ <= 4));
+  return Operand(reg_, reg_.Is64Bits() ? UXTX : UXTW, shift_amount_);
+}
+
+
+// MemOperand
+MemOperand::MemOperand(Register base, int64_t offset, AddrMode addrmode)
+  : base_(base), regoffset_(NoReg), offset_(offset), addrmode_(addrmode) {
+  VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+}
+
+
+MemOperand::MemOperand(Register base,
+                       Register regoffset,
+                       Extend extend,
+                       unsigned shift_amount)
+  : base_(base), regoffset_(regoffset), offset_(0), addrmode_(Offset),
+    shift_(NO_SHIFT), extend_(extend), shift_amount_(shift_amount) {
+  VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+  VIXL_ASSERT(!regoffset.IsSP());
+  VIXL_ASSERT((extend == UXTW) || (extend == SXTW) || (extend == SXTX));
+
+  // SXTX extend mode requires a 64-bit offset register.
+  VIXL_ASSERT(regoffset.Is64Bits() || (extend != SXTX));
+}
+
+
+MemOperand::MemOperand(Register base,
+                       Register regoffset,
+                       Shift shift,
+                       unsigned shift_amount)
+  : base_(base), regoffset_(regoffset), offset_(0), addrmode_(Offset),
+    shift_(shift), extend_(NO_EXTEND), shift_amount_(shift_amount) {
+  VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+  VIXL_ASSERT(regoffset.Is64Bits() && !regoffset.IsSP());
+  VIXL_ASSERT(shift == LSL);
+}
+
+
+MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode)
+  : base_(base), regoffset_(NoReg), addrmode_(addrmode) {
+  VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+
+  if (offset.IsImmediate()) {
+    offset_ = offset.immediate();
+  } else if (offset.IsShiftedRegister()) {
+    VIXL_ASSERT((addrmode == Offset) || (addrmode == PostIndex));
+
+    regoffset_ = offset.reg();
+    shift_ = offset.shift();
+    shift_amount_ = offset.shift_amount();
+
+    extend_ = NO_EXTEND;
+    offset_ = 0;
+
+    // These assertions match those in the shifted-register constructor.
+    VIXL_ASSERT(regoffset_.Is64Bits() && !regoffset_.IsSP());
+    VIXL_ASSERT(shift_ == LSL);
+  } else {
+    VIXL_ASSERT(offset.IsExtendedRegister());
+    VIXL_ASSERT(addrmode == Offset);
+
+    regoffset_ = offset.reg();
+    extend_ = offset.extend();
+    shift_amount_ = offset.shift_amount();
+
+    shift_ = NO_SHIFT;
+    offset_ = 0;
+
+    // These assertions match those in the extended-register constructor.
+    VIXL_ASSERT(!regoffset_.IsSP());
+    VIXL_ASSERT((extend_ == UXTW) || (extend_ == SXTW) || (extend_ == SXTX));
+    VIXL_ASSERT((regoffset_.Is64Bits() || (extend_ != SXTX)));
+  }
+}
+
+
+bool MemOperand::IsImmediateOffset() const {
+  return (addrmode_ == Offset) && regoffset_.Is(NoReg);
+}
+
+
+bool MemOperand::IsRegisterOffset() const {
+  return (addrmode_ == Offset) && !regoffset_.Is(NoReg);
+}
+
+
+bool MemOperand::IsPreIndex() const {
+  return addrmode_ == PreIndex;
+}
+
+
+bool MemOperand::IsPostIndex() const {
+  return addrmode_ == PostIndex;
+}
+
+
+void MemOperand::AddOffset(int64_t offset) {
+  VIXL_ASSERT(IsImmediateOffset());
+  offset_ += offset;
+}
+
+
+// Assembler
+Assembler::Assembler(PositionIndependentCodeOption pic)
+    : pic_(pic),
+      cpu_features_(CPUFeatures::AArch64LegacyBaseline())
+{
+  // Mozilla change: always use maximally-present features.
+  cpu_features_.Combine(CPUFeatures::InferFromOS());
+
+  // Mozilla change: Compile time hard-coded value from js-config.mozbuild.
+#ifndef MOZ_AARCH64_JSCVT
+#  error "MOZ_AARCH64_JSCVT must be defined."
+#elif MOZ_AARCH64_JSCVT >= 1
+  // Note, vixl backend implements the JSCVT flag as a boolean despite having 3
+  // extra bits reserved for forward compatibility in the ARMv8 documentation.
+  cpu_features_.Combine(CPUFeatures::kJSCVT);
+#endif
+}
+
+
+// Code generation.
+void Assembler::br(const Register& xn) {
+  VIXL_ASSERT(xn.Is64Bits());
+  Emit(BR | Rn(xn));
+}
+
+
+void Assembler::blr(const Register& xn) {
+  VIXL_ASSERT(xn.Is64Bits());
+  Emit(BLR | Rn(xn));
+}
+
+
+void Assembler::ret(const Register& xn) {
+  VIXL_ASSERT(xn.Is64Bits());
+  Emit(RET | Rn(xn));
+}
+
+
+void Assembler::NEONTable(const VRegister& vd,
+                          const VRegister& vn,
+                          const VRegister& vm,
+                          NEONTableOp op) {
+  VIXL_ASSERT(vd.Is16B() || vd.Is8B());
+  VIXL_ASSERT(vn.Is16B());
+  VIXL_ASSERT(AreSameFormat(vd, vm));
+  Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::tbl(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vm) {
+  NEONTable(vd, vn, vm, NEON_TBL_1v);
+}
+
+
+void Assembler::tbl(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vn2,
+                    const VRegister& vm) {
+  USE(vn2);
+  VIXL_ASSERT(AreSameFormat(vn, vn2));
+  VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+
+  NEONTable(vd, vn, vm, NEON_TBL_2v);
+}
+
+
+void Assembler::tbl(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vn2,
+                    const VRegister& vn3,
+                    const VRegister& vm) {
+  USE(vn2, vn3);
+  VIXL_ASSERT(AreSameFormat(vn, vn2, vn3));
+  VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+  VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters));
+
+  NEONTable(vd, vn, vm, NEON_TBL_3v);
+}
+
+
+void Assembler::tbl(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vn2,
+                    const VRegister& vn3,
+                    const VRegister& vn4,
+                    const VRegister& vm) {
+  USE(vn2, vn3, vn4);
+  VIXL_ASSERT(AreSameFormat(vn, vn2, vn3, vn4));
+  VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+  VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters));
+  VIXL_ASSERT(vn4.code() == ((vn.code() + 3) % kNumberOfVRegisters));
+
+  NEONTable(vd, vn, vm, NEON_TBL_4v);
+}
+
+
+void Assembler::tbx(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vm) {
+  NEONTable(vd, vn, vm, NEON_TBX_1v);
+}
+
+
+void Assembler::tbx(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vn2,
+                    const VRegister& vm) {
+  USE(vn2);
+  VIXL_ASSERT(AreSameFormat(vn, vn2));
+  VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+
+  NEONTable(vd, vn, vm, NEON_TBX_2v);
+}
+
+
+void Assembler::tbx(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vn2,
+                    const VRegister& vn3,
+                    const VRegister& vm) {
+  USE(vn2, vn3);
+  VIXL_ASSERT(AreSameFormat(vn, vn2, vn3));
+  VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+  VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters));
+
+  NEONTable(vd, vn, vm, NEON_TBX_3v);
+}
+
+
+void Assembler::tbx(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vn2,
+                    const VRegister& vn3,
+                    const VRegister& vn4,
+                    const VRegister& vm) {
+  USE(vn2, vn3, vn4);
+  VIXL_ASSERT(AreSameFormat(vn, vn2, vn3, vn4));
+  VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+  VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters));
+  VIXL_ASSERT(vn4.code() == ((vn.code() + 3) % kNumberOfVRegisters));
+
+  NEONTable(vd, vn, vm, NEON_TBX_4v);
+}
+
+
+void Assembler::add(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  AddSub(rd, rn, operand, LeaveFlags, ADD);
+}
+
+
+void Assembler::adds(const Register& rd,
+                     const Register& rn,
+                     const Operand& operand) {
+  AddSub(rd, rn, operand, SetFlags, ADD);
+}
+
+
+void Assembler::cmn(const Register& rn,
+                    const Operand& operand) {
+  Register zr = AppropriateZeroRegFor(rn);
+  adds(zr, rn, operand);
+}
+
+
+void Assembler::sub(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  AddSub(rd, rn, operand, LeaveFlags, SUB);
+}
+
+
+void Assembler::subs(const Register& rd,
+                     const Register& rn,
+                     const Operand& operand) {
+  AddSub(rd, rn, operand, SetFlags, SUB);
+}
+
+
+void Assembler::cmp(const Register& rn, const Operand& operand) {
+  Register zr = AppropriateZeroRegFor(rn);
+  subs(zr, rn, operand);
+}
+
+
+void Assembler::neg(const Register& rd, const Operand& operand) {
+  Register zr = AppropriateZeroRegFor(rd);
+  sub(rd, zr, operand);
+}
+
+
+void Assembler::negs(const Register& rd, const Operand& operand) {
+  Register zr = AppropriateZeroRegFor(rd);
+  subs(rd, zr, operand);
+}
+
+
+void Assembler::adc(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC);
+}
+
+
+void Assembler::adcs(const Register& rd,
+                     const Register& rn,
+                     const Operand& operand) {
+  AddSubWithCarry(rd, rn, operand, SetFlags, ADC);
+}
+
+
+void Assembler::sbc(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC);
+}
+
+
+void Assembler::sbcs(const Register& rd,
+                     const Register& rn,
+                     const Operand& operand) {
+  AddSubWithCarry(rd, rn, operand, SetFlags, SBC);
+}
+
+
+void Assembler::ngc(const Register& rd, const Operand& operand) {
+  Register zr = AppropriateZeroRegFor(rd);
+  sbc(rd, zr, operand);
+}
+
+
+void Assembler::ngcs(const Register& rd, const Operand& operand) {
+  Register zr = AppropriateZeroRegFor(rd);
+  sbcs(rd, zr, operand);
+}
+
+
+// Logical instructions.
+void Assembler::and_(const Register& rd,
+                     const Register& rn,
+                     const Operand& operand) {
+  Logical(rd, rn, operand, AND);
+}
+
+
+void Assembler::bic(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  Logical(rd, rn, operand, BIC);
+}
+
+
+void Assembler::bics(const Register& rd,
+                     const Register& rn,
+                     const Operand& operand) {
+  Logical(rd, rn, operand, BICS);
+}
+
+
+void Assembler::orr(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  Logical(rd, rn, operand, ORR);
+}
+
+
+void Assembler::orn(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  Logical(rd, rn, operand, ORN);
+}
+
+
+void Assembler::eor(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  Logical(rd, rn, operand, EOR);
+}
+
+
+void Assembler::eon(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand) {
+  Logical(rd, rn, operand, EON);
+}
+
+
+void Assembler::lslv(const Register& rd,
+                     const Register& rn,
+                     const Register& rm) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == rm.size());
+  Emit(SF(rd) | LSLV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::lsrv(const Register& rd,
+                     const Register& rn,
+                     const Register& rm) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == rm.size());
+  Emit(SF(rd) | LSRV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::asrv(const Register& rd,
+                     const Register& rn,
+                     const Register& rm) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == rm.size());
+  Emit(SF(rd) | ASRV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::rorv(const Register& rd,
+                     const Register& rn,
+                     const Register& rm) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == rm.size());
+  Emit(SF(rd) | RORV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+// Bitfield operations.
+void Assembler::bfm(const Register& rd,
+                    const Register& rn,
+                    unsigned immr,
+                    unsigned imms) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
+  Emit(SF(rd) | BFM | N |
+       ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::sbfm(const Register& rd,
+                     const Register& rn,
+                     unsigned immr,
+                     unsigned imms) {
+  VIXL_ASSERT(rd.Is64Bits() || rn.Is32Bits());
+  Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
+  Emit(SF(rd) | SBFM | N |
+       ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::ubfm(const Register& rd,
+                     const Register& rn,
+                     unsigned immr,
+                     unsigned imms) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
+  Emit(SF(rd) | UBFM | N |
+       ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::extr(const Register& rd,
+                     const Register& rn,
+                     const Register& rm,
+                     unsigned lsb) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == rm.size());
+  Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
+  Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.size()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::csel(const Register& rd,
+                     const Register& rn,
+                     const Register& rm,
+                     Condition cond) {
+  ConditionalSelect(rd, rn, rm, cond, CSEL);
+}
+
+
+void Assembler::csinc(const Register& rd,
+                      const Register& rn,
+                      const Register& rm,
+                      Condition cond) {
+  ConditionalSelect(rd, rn, rm, cond, CSINC);
+}
+
+
+void Assembler::csinv(const Register& rd,
+                      const Register& rn,
+                      const Register& rm,
+                      Condition cond) {
+  ConditionalSelect(rd, rn, rm, cond, CSINV);
+}
+
+
+void Assembler::csneg(const Register& rd,
+                      const Register& rn,
+                      const Register& rm,
+                      Condition cond) {
+  ConditionalSelect(rd, rn, rm, cond, CSNEG);
+}
+
+
+void Assembler::cset(const Register &rd, Condition cond) {
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  Register zr = AppropriateZeroRegFor(rd);
+  csinc(rd, zr, zr, InvertCondition(cond));
+}
+
+
+void Assembler::csetm(const Register &rd, Condition cond) {
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  Register zr = AppropriateZeroRegFor(rd);
+  csinv(rd, zr, zr, InvertCondition(cond));
+}
+
+
+void Assembler::cinc(const Register &rd, const Register &rn, Condition cond) {
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  csinc(rd, rn, rn, InvertCondition(cond));
+}
+
+
+void Assembler::cinv(const Register &rd, const Register &rn, Condition cond) {
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  csinv(rd, rn, rn, InvertCondition(cond));
+}
+
+
+void Assembler::cneg(const Register &rd, const Register &rn, Condition cond) {
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  csneg(rd, rn, rn, InvertCondition(cond));
+}
+
+
+void Assembler::ConditionalSelect(const Register& rd,
+                                  const Register& rn,
+                                  const Register& rm,
+                                  Condition cond,
+                                  ConditionalSelectOp op) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == rm.size());
+  Emit(SF(rd) | op | Rm(rm) | Cond(cond) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::ccmn(const Register& rn,
+                     const Operand& operand,
+                     StatusFlags nzcv,
+                     Condition cond) {
+  ConditionalCompare(rn, operand, nzcv, cond, CCMN);
+}
+
+
+void Assembler::ccmp(const Register& rn,
+                     const Operand& operand,
+                     StatusFlags nzcv,
+                     Condition cond) {
+  ConditionalCompare(rn, operand, nzcv, cond, CCMP);
+}
+
+
+void Assembler::DataProcessing3Source(const Register& rd,
+                     const Register& rn,
+                     const Register& rm,
+                     const Register& ra,
+                     DataProcessing3SourceOp op) {
+  Emit(SF(rd) | op | Rm(rm) | Ra(ra) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32b(const Register& rd,
+                       const Register& rn,
+                       const Register& rm) {
+  VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+  Emit(SF(rm) | Rm(rm) | CRC32B | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32h(const Register& rd,
+                       const Register& rn,
+                       const Register& rm) {
+  VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+  Emit(SF(rm) | Rm(rm) | CRC32H | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32w(const Register& rd,
+                       const Register& rn,
+                       const Register& rm) {
+  VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+  Emit(SF(rm) | Rm(rm) | CRC32W | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32x(const Register& rd,
+                       const Register& rn,
+                       const Register& rm) {
+  VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is64Bits());
+  Emit(SF(rm) | Rm(rm) | CRC32X | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32cb(const Register& rd,
+                        const Register& rn,
+                        const Register& rm) {
+  VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+  Emit(SF(rm) | Rm(rm) | CRC32CB | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32ch(const Register& rd,
+                        const Register& rn,
+                        const Register& rm) {
+  VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+  Emit(SF(rm) | Rm(rm) | CRC32CH | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32cw(const Register& rd,
+                        const Register& rn,
+                        const Register& rm) {
+  VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+  Emit(SF(rm) | Rm(rm) | CRC32CW | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32cx(const Register& rd,
+                        const Register& rn,
+                        const Register& rm) {
+  VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is64Bits());
+  Emit(SF(rm) | Rm(rm) | CRC32CX | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::mul(const Register& rd,
+                    const Register& rn,
+                    const Register& rm) {
+  VIXL_ASSERT(AreSameSizeAndType(rd, rn, rm));
+  DataProcessing3Source(rd, rn, rm, AppropriateZeroRegFor(rd), MADD);
+}
+
+
+void Assembler::madd(const Register& rd,
+                     const Register& rn,
+                     const Register& rm,
+                     const Register& ra) {
+  DataProcessing3Source(rd, rn, rm, ra, MADD);
+}
+
+
+void Assembler::mneg(const Register& rd,
+                     const Register& rn,
+                     const Register& rm) {
+  VIXL_ASSERT(AreSameSizeAndType(rd, rn, rm));
+  DataProcessing3Source(rd, rn, rm, AppropriateZeroRegFor(rd), MSUB);
+}
+
+
+void Assembler::msub(const Register& rd,
+                     const Register& rn,
+                     const Register& rm,
+                     const Register& ra) {
+  DataProcessing3Source(rd, rn, rm, ra, MSUB);
+}
+
+
+void Assembler::umaddl(const Register& rd,
+                       const Register& rn,
+                       const Register& rm,
+                       const Register& ra) {
+  VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits());
+  VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+  DataProcessing3Source(rd, rn, rm, ra, UMADDL_x);
+}
+
+
+void Assembler::smaddl(const Register& rd,
+                       const Register& rn,
+                       const Register& rm,
+                       const Register& ra) {
+  VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits());
+  VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+  DataProcessing3Source(rd, rn, rm, ra, SMADDL_x);
+}
+
+
+void Assembler::umsubl(const Register& rd,
+                       const Register& rn,
+                       const Register& rm,
+                       const Register& ra) {
+  VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits());
+  VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+  DataProcessing3Source(rd, rn, rm, ra, UMSUBL_x);
+}
+
+
+void Assembler::smsubl(const Register& rd,
+                       const Register& rn,
+                       const Register& rm,
+                       const Register& ra) {
+  VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits());
+  VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+  DataProcessing3Source(rd, rn, rm, ra, SMSUBL_x);
+}
+
+
+void Assembler::smull(const Register& rd,
+                      const Register& rn,
+                      const Register& rm) {
+  VIXL_ASSERT(rd.Is64Bits());
+  VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+  DataProcessing3Source(rd, rn, rm, xzr, SMADDL_x);
+}
+
+
+void Assembler::sdiv(const Register& rd,
+                     const Register& rn,
+                     const Register& rm) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == rm.size());
+  Emit(SF(rd) | SDIV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::smulh(const Register& xd,
+                      const Register& xn,
+                      const Register& xm) {
+  VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits());
+  DataProcessing3Source(xd, xn, xm, xzr, SMULH_x);
+}
+
+
+void Assembler::umulh(const Register& xd,
+                      const Register& xn,
+                      const Register& xm) {
+  VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits());
+  DataProcessing3Source(xd, xn, xm, xzr, UMULH_x);
+}
+
+
+void Assembler::udiv(const Register& rd,
+                     const Register& rn,
+                     const Register& rm) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == rm.size());
+  Emit(SF(rd) | UDIV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::rbit(const Register& rd,
+                     const Register& rn) {
+  DataProcessing1Source(rd, rn, RBIT);
+}
+
+
+void Assembler::rev16(const Register& rd,
+                      const Register& rn) {
+  DataProcessing1Source(rd, rn, REV16);
+}
+
+
+void Assembler::rev32(const Register& rd,
+                      const Register& rn) {
+  VIXL_ASSERT(rd.Is64Bits());
+  DataProcessing1Source(rd, rn, REV);
+}
+
+
+void Assembler::rev(const Register& rd,
+                    const Register& rn) {
+  DataProcessing1Source(rd, rn, rd.Is64Bits() ? REV_x : REV_w);
+}
+
+
+void Assembler::clz(const Register& rd,
+                    const Register& rn) {
+  DataProcessing1Source(rd, rn, CLZ);
+}
+
+
+void Assembler::cls(const Register& rd,
+                    const Register& rn) {
+  DataProcessing1Source(rd, rn, CLS);
+}
+
+
+void Assembler::ldp(const CPURegister& rt,
+                    const CPURegister& rt2,
+                    const MemOperand& src) {
+  LoadStorePair(rt, rt2, src, LoadPairOpFor(rt, rt2));
+}
+
+
+void Assembler::stp(const CPURegister& rt,
+                    const CPURegister& rt2,
+                    const MemOperand& dst) {
+  LoadStorePair(rt, rt2, dst, StorePairOpFor(rt, rt2));
+}
+
+
+void Assembler::ldpsw(const Register& rt,
+                      const Register& rt2,
+                      const MemOperand& src) {
+  VIXL_ASSERT(rt.Is64Bits());
+  LoadStorePair(rt, rt2, src, LDPSW_x);
+}
+
+
+void Assembler::LoadStorePair(const CPURegister& rt,
+                              const CPURegister& rt2,
+                              const MemOperand& addr,
+                              LoadStorePairOp op) {
+  // 'rt' and 'rt2' can only be aliased for stores.
+  VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || !rt.Is(rt2));
+  VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+  VIXL_ASSERT(IsImmLSPair(addr.offset(), CalcLSPairDataSize(op)));
+
+  int offset = static_cast<int>(addr.offset());
+  Instr memop = op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) |
+                ImmLSPair(offset, CalcLSPairDataSize(op));
+
+  Instr addrmodeop;
+  if (addr.IsImmediateOffset()) {
+    addrmodeop = LoadStorePairOffsetFixed;
+  } else {
+    VIXL_ASSERT(addr.offset() != 0);
+    if (addr.IsPreIndex()) {
+      addrmodeop = LoadStorePairPreIndexFixed;
+    } else {
+      VIXL_ASSERT(addr.IsPostIndex());
+      addrmodeop = LoadStorePairPostIndexFixed;
+    }
+  }
+  Emit(addrmodeop | memop);
+}
+
+
+void Assembler::ldnp(const CPURegister& rt,
+                     const CPURegister& rt2,
+                     const MemOperand& src) {
+  LoadStorePairNonTemporal(rt, rt2, src,
+                           LoadPairNonTemporalOpFor(rt, rt2));
+}
+
+
+void Assembler::stnp(const CPURegister& rt,
+                     const CPURegister& rt2,
+                     const MemOperand& dst) {
+  LoadStorePairNonTemporal(rt, rt2, dst,
+                           StorePairNonTemporalOpFor(rt, rt2));
+}
+
+
+void Assembler::LoadStorePairNonTemporal(const CPURegister& rt,
+                                         const CPURegister& rt2,
+                                         const MemOperand& addr,
+                                         LoadStorePairNonTemporalOp op) {
+  VIXL_ASSERT(!rt.Is(rt2));
+  VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+  VIXL_ASSERT(addr.IsImmediateOffset());
+
+  unsigned size = CalcLSPairDataSize(
+    static_cast<LoadStorePairOp>(op & LoadStorePairMask));
+  VIXL_ASSERT(IsImmLSPair(addr.offset(), size));
+  int offset = static_cast<int>(addr.offset());
+  Emit(op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) | ImmLSPair(offset, size));
+}
+
+
+// Memory instructions.
+void Assembler::ldrb(const Register& rt, const MemOperand& src,
+                     LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, src, LDRB_w, option);
+}
+
+
+void Assembler::strb(const Register& rt, const MemOperand& dst,
+                     LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, dst, STRB_w, option);
+}
+
+
+void Assembler::ldrsb(const Register& rt, const MemOperand& src,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w, option);
+}
+
+
+void Assembler::ldrh(const Register& rt, const MemOperand& src,
+                     LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, src, LDRH_w, option);
+}
+
+
+void Assembler::strh(const Register& rt, const MemOperand& dst,
+                     LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, dst, STRH_w, option);
+}
+
+
+void Assembler::ldrsh(const Register& rt, const MemOperand& src,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w, option);
+}
+
+
+void Assembler::ldr(const CPURegister& rt, const MemOperand& src,
+                    LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, src, LoadOpFor(rt), option);
+}
+
+
+void Assembler::str(const CPURegister& rt, const MemOperand& dst,
+                    LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, dst, StoreOpFor(rt), option);
+}
+
+
+void Assembler::ldrsw(const Register& rt, const MemOperand& src,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(rt.Is64Bits());
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  LoadStore(rt, src, LDRSW_x, option);
+}
+
+
+void Assembler::ldurb(const Register& rt, const MemOperand& src,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, src, LDRB_w, option);
+}
+
+
+void Assembler::sturb(const Register& rt, const MemOperand& dst,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, dst, STRB_w, option);
+}
+
+
+void Assembler::ldursb(const Register& rt, const MemOperand& src,
+                       LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w, option);
+}
+
+
+void Assembler::ldurh(const Register& rt, const MemOperand& src,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, src, LDRH_w, option);
+}
+
+
+void Assembler::sturh(const Register& rt, const MemOperand& dst,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, dst, STRH_w, option);
+}
+
+
+void Assembler::ldursh(const Register& rt, const MemOperand& src,
+                       LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w, option);
+}
+
+
+void Assembler::ldur(const CPURegister& rt, const MemOperand& src,
+                     LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, src, LoadOpFor(rt), option);
+}
+
+
+void Assembler::stur(const CPURegister& rt, const MemOperand& dst,
+                     LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, dst, StoreOpFor(rt), option);
+}
+
+
+void Assembler::ldursw(const Register& rt, const MemOperand& src,
+                       LoadStoreScalingOption option) {
+  VIXL_ASSERT(rt.Is64Bits());
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  LoadStore(rt, src, LDRSW_x, option);
+}
+
+
+void Assembler::ldrsw(const Register& rt, int imm19) {
+  Emit(LDRSW_x_lit | ImmLLiteral(imm19) | Rt(rt));
+}
+
+
+void Assembler::ldr(const CPURegister& rt, int imm19) {
+  LoadLiteralOp op = LoadLiteralOpFor(rt);
+  Emit(op | ImmLLiteral(imm19) | Rt(rt));
+}
+
+// clang-format off
+#define COMPARE_AND_SWAP_W_X_LIST(V) \
+  V(cas,   CAS)                      \
+  V(casa,  CASA)                     \
+  V(casl,  CASL)                     \
+  V(casal, CASAL)
+// clang-format on
+
+#define DEFINE_ASM_FUNC(FN, OP)                                  \
+  void Assembler::FN(const Register& rs, const Register& rt,     \
+                     const MemOperand& src) {                    \
+    VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+    LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w;     \
+    Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base()));    \
+  }
+COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+// clang-format off
+#define COMPARE_AND_SWAP_W_LIST(V) \
+  V(casb,   CASB)                  \
+  V(casab,  CASAB)                 \
+  V(caslb,  CASLB)                 \
+  V(casalb, CASALB)                \
+  V(cash,   CASH)                  \
+  V(casah,  CASAH)                 \
+  V(caslh,  CASLH)                 \
+  V(casalh, CASALH)
+// clang-format on
+
+#define DEFINE_ASM_FUNC(FN, OP)                                  \
+  void Assembler::FN(const Register& rs, const Register& rt,     \
+                     const MemOperand& src) {                    \
+    VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+    Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base()));    \
+  }
+COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+// clang-format off
+#define COMPARE_AND_SWAP_PAIR_LIST(V) \
+  V(casp,   CASP)                     \
+  V(caspa,  CASPA)                    \
+  V(caspl,  CASPL)                    \
+  V(caspal, CASPAL)
+// clang-format on
+
+#define DEFINE_ASM_FUNC(FN, OP)                                  \
+  void Assembler::FN(const Register& rs, const Register& rs1,    \
+                     const Register& rt, const Register& rt1,    \
+                     const MemOperand& src) {                    \
+    USE(rs1, rt1);                                               \
+    VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+    VIXL_ASSERT(AreEven(rs, rt));                                \
+    VIXL_ASSERT(AreConsecutive(rs, rs1));                        \
+    VIXL_ASSERT(AreConsecutive(rt, rt1));                        \
+    LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w;     \
+    Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base()));    \
+  }
+COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::prfm(PrefetchOperation op, int imm19) {
+  Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19));
+}
+
+
+// Exclusive-access instructions.
+void Assembler::stxrb(const Register& rs,
+                      const Register& rt,
+                      const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  Emit(STXRB_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stxrh(const Register& rs,
+                      const Register& rt,
+                      const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  Emit(STXRH_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stxr(const Register& rs,
+                     const Register& rt,
+                     const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? STXR_x : STXR_w;
+  Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::ldxrb(const Register& rt,
+                      const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  Emit(LDXRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldxrh(const Register& rt,
+                      const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  Emit(LDXRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldxr(const Register& rt,
+                     const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? LDXR_x : LDXR_w;
+  Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::stxp(const Register& rs,
+                     const Register& rt,
+                     const Register& rt2,
+                     const MemOperand& dst) {
+  VIXL_ASSERT(rt.size() == rt2.size());
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? STXP_x : STXP_w;
+  Emit(op | Rs(rs) | Rt(rt) | Rt2(rt2) | RnSP(dst.base()));
+}
+
+
+void Assembler::ldxp(const Register& rt,
+                     const Register& rt2,
+                     const MemOperand& src) {
+  VIXL_ASSERT(rt.size() == rt2.size());
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? LDXP_x : LDXP_w;
+  Emit(op | Rs_mask | Rt(rt) | Rt2(rt2) | RnSP(src.base()));
+}
+
+
+void Assembler::stlxrb(const Register& rs,
+                       const Register& rt,
+                       const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  Emit(STLXRB_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stlxrh(const Register& rs,
+                       const Register& rt,
+                       const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  Emit(STLXRH_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stlxr(const Register& rs,
+                      const Register& rt,
+                      const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? STLXR_x : STLXR_w;
+  Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::ldaxrb(const Register& rt,
+                       const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  Emit(LDAXRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldaxrh(const Register& rt,
+                       const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  Emit(LDAXRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldaxr(const Register& rt,
+                      const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? LDAXR_x : LDAXR_w;
+  Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::stlxp(const Register& rs,
+                      const Register& rt,
+                      const Register& rt2,
+                      const MemOperand& dst) {
+  VIXL_ASSERT(rt.size() == rt2.size());
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? STLXP_x : STLXP_w;
+  Emit(op | Rs(rs) | Rt(rt) | Rt2(rt2) | RnSP(dst.base()));
+}
+
+
+void Assembler::ldaxp(const Register& rt,
+                      const Register& rt2,
+                      const MemOperand& src) {
+  VIXL_ASSERT(rt.size() == rt2.size());
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? LDAXP_x : LDAXP_w;
+  Emit(op | Rs_mask | Rt(rt) | Rt2(rt2) | RnSP(src.base()));
+}
+
+
+void Assembler::stlrb(const Register& rt,
+                      const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  Emit(STLRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stlrh(const Register& rt,
+                      const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  Emit(STLRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stlr(const Register& rt,
+                     const MemOperand& dst) {
+  VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? STLR_x : STLR_w;
+  Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::ldarb(const Register& rt,
+                      const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  Emit(LDARB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldarh(const Register& rt,
+                      const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  Emit(LDARH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldar(const Register& rt,
+                     const MemOperand& src) {
+  VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+  LoadStoreExclusive op = rt.Is64Bits() ? LDAR_x : LDAR_w;
+  Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+// These macros generate all the variations of the atomic memory operations,
+// e.g. ldadd, ldadda, ldaddb, staddl, etc.
+// For a full list of the methods with comments, see the assembler header file.
+
+// clang-format off
+#define ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(V, DEF) \
+  V(DEF, add,  LDADD)                               \
+  V(DEF, clr,  LDCLR)                               \
+  V(DEF, eor,  LDEOR)                               \
+  V(DEF, set,  LDSET)                               \
+  V(DEF, smax, LDSMAX)                              \
+  V(DEF, smin, LDSMIN)                              \
+  V(DEF, umax, LDUMAX)                              \
+  V(DEF, umin, LDUMIN)
+
+#define ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \
+  V(NAME,     OP##_x,   OP##_w)                \
+  V(NAME##l,  OP##L_x,  OP##L_w)               \
+  V(NAME##b,  OP##B,    OP##B)                 \
+  V(NAME##lb, OP##LB,   OP##LB)                \
+  V(NAME##h,  OP##H,    OP##H)                 \
+  V(NAME##lh, OP##LH,   OP##LH)
+
+#define ATOMIC_MEMORY_LOAD_MODES(V, NAME, OP) \
+  ATOMIC_MEMORY_STORE_MODES(V, NAME, OP)      \
+  V(NAME##a,   OP##A_x,  OP##A_w)             \
+  V(NAME##al,  OP##AL_x, OP##AL_w)            \
+  V(NAME##ab,  OP##AB,   OP##AB)              \
+  V(NAME##alb, OP##ALB,  OP##ALB)             \
+  V(NAME##ah,  OP##AH,   OP##AH)              \
+  V(NAME##alh, OP##ALH,  OP##ALH)
+// clang-format on
+
+#define DEFINE_ASM_LOAD_FUNC(FN, OP_X, OP_W)                     \
+  void Assembler::ld##FN(const Register& rs, const Register& rt, \
+                         const MemOperand& src) {                \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics));                  \
+    VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+    AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W;             \
+    Emit(op | Rs(rs) | Rt(rt) | RnSP(src.base()));               \
+  }
+#define DEFINE_ASM_STORE_FUNC(FN, OP_X, OP_W)                         \
+  void Assembler::st##FN(const Register& rs, const MemOperand& src) { \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics));                       \
+    ld##FN(rs, AppropriateZeroRegFor(rs), src);                       \
+  }
+
+ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_LOAD_MODES,
+                                    DEFINE_ASM_LOAD_FUNC)
+ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_STORE_MODES,
+                                    DEFINE_ASM_STORE_FUNC)
+
+#define DEFINE_ASM_SWP_FUNC(FN, OP_X, OP_W)                      \
+  void Assembler::FN(const Register& rs, const Register& rt,     \
+                     const MemOperand& src) {                    \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics));                  \
+    VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+    AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W;             \
+    Emit(op | Rs(rs) | Rt(rt) | RnSP(src.base()));               \
+  }
+
+ATOMIC_MEMORY_LOAD_MODES(DEFINE_ASM_SWP_FUNC, swp, SWP)
+
+#undef DEFINE_ASM_LOAD_FUNC
+#undef DEFINE_ASM_STORE_FUNC
+#undef DEFINE_ASM_SWP_FUNC
+
+void Assembler::prfm(PrefetchOperation op, const MemOperand& address,
+                     LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  Prefetch(op, address, option);
+}
+
+
+void Assembler::prfum(PrefetchOperation op, const MemOperand& address,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  Prefetch(op, address, option);
+}
+
+
+void Assembler::sys(int op1, int crn, int crm, int op2, const Register& rt) {
+  Emit(SYS | ImmSysOp1(op1) | CRn(crn) | CRm(crm) | ImmSysOp2(op2) | Rt(rt));
+}
+
+
+void Assembler::sys(int op, const Register& rt) {
+  Emit(SYS | SysOp(op) | Rt(rt));
+}
+
+
+void Assembler::dc(DataCacheOp op, const Register& rt) {
+  VIXL_ASSERT((op == CVAC) || (op == CVAU) || (op == CIVAC) || (op == ZVA));
+  sys(op, rt);
+}
+
+
+void Assembler::ic(InstructionCacheOp op, const Register& rt) {
+  VIXL_ASSERT(op == IVAU);
+  sys(op, rt);
+}
+
+
+// NEON structure loads and stores.
+Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
+  Instr addr_field = RnSP(addr.base());
+
+  if (addr.IsPostIndex()) {
+    VIXL_STATIC_ASSERT(NEONLoadStoreMultiStructPostIndex ==
+        static_cast<NEONLoadStoreMultiStructPostIndexOp>(
+            NEONLoadStoreSingleStructPostIndex));
+
+    addr_field |= NEONLoadStoreMultiStructPostIndex;
+    if (addr.offset() == 0) {
+      addr_field |= RmNot31(addr.regoffset());
+    } else {
+      // The immediate post index addressing mode is indicated by rm = 31.
+      // The immediate is implied by the number of vector registers used.
+      addr_field |= (0x1f << Rm_offset);
+    }
+  } else {
+    VIXL_ASSERT(addr.IsImmediateOffset() && (addr.offset() == 0));
+  }
+  return addr_field;
+}
+
+void Assembler::LoadStoreStructVerify(const VRegister& vt,
+                                      const MemOperand& addr,
+                                      Instr op) {
+#ifdef DEBUG
+  // Assert that addressing mode is either offset (with immediate 0), post
+  // index by immediate of the size of the register list, or post index by a
+  // value in a core register.
+  if (addr.IsImmediateOffset()) {
+    VIXL_ASSERT(addr.offset() == 0);
+  } else {
+    int offset = vt.SizeInBytes();
+    switch (op) {
+      case NEON_LD1_1v:
+      case NEON_ST1_1v:
+        offset *= 1; break;
+      case NEONLoadStoreSingleStructLoad1:
+      case NEONLoadStoreSingleStructStore1:
+      case NEON_LD1R:
+        offset = (offset / vt.lanes()) * 1; break;
+
+      case NEON_LD1_2v:
+      case NEON_ST1_2v:
+      case NEON_LD2:
+      case NEON_ST2:
+        offset *= 2;
+        break;
+      case NEONLoadStoreSingleStructLoad2:
+      case NEONLoadStoreSingleStructStore2:
+      case NEON_LD2R:
+        offset = (offset / vt.lanes()) * 2; break;
+
+      case NEON_LD1_3v:
+      case NEON_ST1_3v:
+      case NEON_LD3:
+      case NEON_ST3:
+        offset *= 3; break;
+      case NEONLoadStoreSingleStructLoad3:
+      case NEONLoadStoreSingleStructStore3:
+      case NEON_LD3R:
+        offset = (offset / vt.lanes()) * 3; break;
+
+      case NEON_LD1_4v:
+      case NEON_ST1_4v:
+      case NEON_LD4:
+      case NEON_ST4:
+        offset *= 4; break;
+      case NEONLoadStoreSingleStructLoad4:
+      case NEONLoadStoreSingleStructStore4:
+      case NEON_LD4R:
+        offset = (offset / vt.lanes()) * 4; break;
+      default:
+        VIXL_UNREACHABLE();
+    }
+    VIXL_ASSERT(!addr.regoffset().Is(NoReg) ||
+                addr.offset() == offset);
+  }
+#else
+  USE(vt, addr, op);
+#endif
+}
+
+void Assembler::LoadStoreStruct(const VRegister& vt,
+                                const MemOperand& addr,
+                                NEONLoadStoreMultiStructOp op) {
+  LoadStoreStructVerify(vt, addr, op);
+  VIXL_ASSERT(vt.IsVector() || vt.Is1D());
+  Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
+}
+
+
+void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt,
+					      const MemOperand& addr,
+					      NEONLoadStoreSingleStructOp op) {
+  LoadStoreStructVerify(vt, addr, op);
+  Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+                    const MemOperand& src) {
+  LoadStoreStruct(vt, src, NEON_LD1_1v);
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+                    const VRegister& vt2,
+                    const MemOperand& src) {
+  USE(vt2);
+  VIXL_ASSERT(AreSameFormat(vt, vt2));
+  VIXL_ASSERT(AreConsecutive(vt, vt2));
+  LoadStoreStruct(vt, src, NEON_LD1_2v);
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const MemOperand& src) {
+  USE(vt2, vt3);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+  LoadStoreStruct(vt, src, NEON_LD1_3v);
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const VRegister& vt4,
+                    const MemOperand& src) {
+  USE(vt2, vt3, vt4);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+  LoadStoreStruct(vt, src, NEON_LD1_4v);
+}
+
+
+void Assembler::ld2(const VRegister& vt,
+                    const VRegister& vt2,
+                    const MemOperand& src) {
+  USE(vt2);
+  VIXL_ASSERT(AreSameFormat(vt, vt2));
+  VIXL_ASSERT(AreConsecutive(vt, vt2));
+  LoadStoreStruct(vt, src, NEON_LD2);
+}
+
+
+void Assembler::ld2(const VRegister& vt,
+                    const VRegister& vt2,
+                    int lane,
+                    const MemOperand& src) {
+  USE(vt2);
+  VIXL_ASSERT(AreSameFormat(vt, vt2));
+  VIXL_ASSERT(AreConsecutive(vt, vt2));
+  LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2);
+}
+
+
+void Assembler::ld2r(const VRegister& vt,
+                     const VRegister& vt2,
+                     const MemOperand& src) {
+  USE(vt2);
+  VIXL_ASSERT(AreSameFormat(vt, vt2));
+  VIXL_ASSERT(AreConsecutive(vt, vt2));
+  LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R);
+}
+
+
+void Assembler::ld3(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const MemOperand& src) {
+  USE(vt2, vt3);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+  LoadStoreStruct(vt, src, NEON_LD3);
+}
+
+
+void Assembler::ld3(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    int lane,
+                    const MemOperand& src) {
+  USE(vt2, vt3);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+  LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3);
+}
+
+
+void Assembler::ld3r(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const MemOperand& src) {
+  USE(vt2, vt3);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+  LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R);
+}
+
+
+void Assembler::ld4(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const VRegister& vt4,
+                    const MemOperand& src) {
+  USE(vt2, vt3, vt4);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+  LoadStoreStruct(vt, src, NEON_LD4);
+}
+
+
+void Assembler::ld4(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const VRegister& vt4,
+                    int lane,
+                    const MemOperand& src) {
+  USE(vt2, vt3, vt4);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+  LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4);
+}
+
+
+void Assembler::ld4r(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const VRegister& vt4,
+                    const MemOperand& src) {
+  USE(vt2, vt3, vt4);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+  LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+                    const MemOperand& src) {
+  LoadStoreStruct(vt, src, NEON_ST1_1v);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+                    const VRegister& vt2,
+                    const MemOperand& src) {
+  USE(vt2);
+  VIXL_ASSERT(AreSameFormat(vt, vt2));
+  VIXL_ASSERT(AreConsecutive(vt, vt2));
+  LoadStoreStruct(vt, src, NEON_ST1_2v);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const MemOperand& src) {
+  USE(vt2, vt3);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+  LoadStoreStruct(vt, src, NEON_ST1_3v);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const VRegister& vt4,
+                    const MemOperand& src) {
+  USE(vt2, vt3, vt4);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+  LoadStoreStruct(vt, src, NEON_ST1_4v);
+}
+
+
+void Assembler::st2(const VRegister& vt,
+                    const VRegister& vt2,
+                    const MemOperand& dst) {
+  USE(vt2);
+  VIXL_ASSERT(AreSameFormat(vt, vt2));
+  VIXL_ASSERT(AreConsecutive(vt, vt2));
+  LoadStoreStruct(vt, dst, NEON_ST2);
+}
+
+
+void Assembler::st2(const VRegister& vt,
+                    const VRegister& vt2,
+                    int lane,
+                    const MemOperand& dst) {
+  USE(vt2);
+  VIXL_ASSERT(AreSameFormat(vt, vt2));
+  VIXL_ASSERT(AreConsecutive(vt, vt2));
+  LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2);
+}
+
+
+void Assembler::st3(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const MemOperand& dst) {
+  USE(vt2, vt3);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+  LoadStoreStruct(vt, dst, NEON_ST3);
+}
+
+
+void Assembler::st3(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    int lane,
+                    const MemOperand& dst) {
+  USE(vt2, vt3);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+  LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3);
+}
+
+
+void Assembler::st4(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const VRegister& vt4,
+                    const MemOperand& dst) {
+  USE(vt2, vt3, vt4);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+  LoadStoreStruct(vt, dst, NEON_ST4);
+}
+
+
+void Assembler::st4(const VRegister& vt,
+                    const VRegister& vt2,
+                    const VRegister& vt3,
+                    const VRegister& vt4,
+                    int lane,
+                    const MemOperand& dst) {
+  USE(vt2, vt3, vt4);
+  VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+  VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+  LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4);
+}
+
+
+void Assembler::LoadStoreStructSingle(const VRegister& vt,
+                                      uint32_t lane,
+                                      const MemOperand& addr,
+                                      NEONLoadStoreSingleStructOp op) {
+  LoadStoreStructVerify(vt, addr, op);
+
+  // We support vt arguments of the form vt.VxT() or vt.T(), where x is the
+  // number of lanes, and T is b, h, s or d.
+  unsigned lane_size = vt.LaneSizeInBytes();
+  VIXL_ASSERT(lane < (kQRegSizeInBytes / lane_size));
+
+  // Lane size is encoded in the opcode field. Lane index is encoded in the Q,
+  // S and size fields.
+  lane *= lane_size;
+  if (lane_size == 8) lane++;
+
+  Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask;
+  Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask;
+  Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask;
+
+  Instr instr = op;
+  switch (lane_size) {
+    case 1: instr |= NEONLoadStoreSingle_b; break;
+    case 2: instr |= NEONLoadStoreSingle_h; break;
+    case 4: instr |= NEONLoadStoreSingle_s; break;
+    default:
+      VIXL_ASSERT(lane_size == 8);
+      instr |= NEONLoadStoreSingle_d;
+  }
+
+  Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt));
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+                    int lane,
+                    const MemOperand& src) {
+  LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1);
+}
+
+
+void Assembler::ld1r(const VRegister& vt,
+                     const MemOperand& src) {
+  LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+                    int lane,
+                    const MemOperand& dst) {
+  LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
+}
+
+
+void Assembler::NEON3DifferentL(const VRegister& vd,
+                                const VRegister& vn,
+                                const VRegister& vm,
+                                NEON3DifferentOp vop) {
+  VIXL_ASSERT(AreSameFormat(vn, vm));
+  VIXL_ASSERT((vn.Is1H() && vd.Is1S()) ||
+              (vn.Is1S() && vd.Is1D()) ||
+              (vn.Is8B() && vd.Is8H()) ||
+              (vn.Is4H() && vd.Is4S()) ||
+              (vn.Is2S() && vd.Is2D()) ||
+              (vn.Is16B() && vd.Is8H())||
+              (vn.Is8H() && vd.Is4S()) ||
+              (vn.Is4S() && vd.Is2D()));
+  Instr format, op = vop;
+  if (vd.IsScalar()) {
+    op |= NEON_Q | NEONScalar;
+    format = SFormat(vn);
+  } else {
+    format = VFormat(vn);
+  }
+  Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEON3DifferentW(const VRegister& vd,
+                                const VRegister& vn,
+                                const VRegister& vm,
+                                NEON3DifferentOp vop) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT((vm.Is8B() && vd.Is8H()) ||
+              (vm.Is4H() && vd.Is4S()) ||
+              (vm.Is2S() && vd.Is2D()) ||
+              (vm.Is16B() && vd.Is8H())||
+              (vm.Is8H() && vd.Is4S()) ||
+              (vm.Is4S() && vd.Is2D()));
+  Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEON3DifferentHN(const VRegister& vd,
+                                 const VRegister& vn,
+                                 const VRegister& vm,
+                                 NEON3DifferentOp vop) {
+  VIXL_ASSERT(AreSameFormat(vm, vn));
+  VIXL_ASSERT((vd.Is8B() && vn.Is8H()) ||
+              (vd.Is4H() && vn.Is4S()) ||
+              (vd.Is2S() && vn.Is2D()) ||
+              (vd.Is16B() && vn.Is8H())||
+              (vd.Is8H() && vn.Is4S()) ||
+              (vd.Is4S() && vn.Is2D()));
+  Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_3DIFF_LONG_LIST(V) \
+  V(pmull,  NEON_PMULL,  vn.IsVector() && vn.Is8B())                           \
+  V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B())                          \
+  V(saddl,  NEON_SADDL,  vn.IsVector() && vn.IsD())                            \
+  V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ())                            \
+  V(sabal,  NEON_SABAL,  vn.IsVector() && vn.IsD())                            \
+  V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ())                            \
+  V(uabal,  NEON_UABAL,  vn.IsVector() && vn.IsD())                            \
+  V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ())                            \
+  V(sabdl,  NEON_SABDL,  vn.IsVector() && vn.IsD())                            \
+  V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ())                            \
+  V(uabdl,  NEON_UABDL,  vn.IsVector() && vn.IsD())                            \
+  V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ())                            \
+  V(smlal,  NEON_SMLAL,  vn.IsVector() && vn.IsD())                            \
+  V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ())                            \
+  V(umlal,  NEON_UMLAL,  vn.IsVector() && vn.IsD())                            \
+  V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ())                            \
+  V(smlsl,  NEON_SMLSL,  vn.IsVector() && vn.IsD())                            \
+  V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ())                            \
+  V(umlsl,  NEON_UMLSL,  vn.IsVector() && vn.IsD())                            \
+  V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ())                            \
+  V(smull,  NEON_SMULL,  vn.IsVector() && vn.IsD())                            \
+  V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ())                            \
+  V(umull,  NEON_UMULL,  vn.IsVector() && vn.IsD())                            \
+  V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ())                            \
+  V(ssubl,  NEON_SSUBL,  vn.IsVector() && vn.IsD())                            \
+  V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ())                            \
+  V(uaddl,  NEON_UADDL,  vn.IsVector() && vn.IsD())                            \
+  V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ())                            \
+  V(usubl,  NEON_USUBL,  vn.IsVector() && vn.IsD())                            \
+  V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ())                            \
+  V(sqdmlal,  NEON_SQDMLAL,  vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+  V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
+  V(sqdmlsl,  NEON_SQDMLSL,  vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+  V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
+  V(sqdmull,  NEON_SQDMULL,  vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+  V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
+
+
+#define DEFINE_ASM_FUNC(FN, OP, AS)        \
+void Assembler::FN(const VRegister& vd,    \
+                   const VRegister& vn,    \
+                   const VRegister& vm) {  \
+  VIXL_ASSERT(AS);                         \
+  NEON3DifferentL(vd, vn, vm, OP);         \
+}
+NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+#define NEON_3DIFF_HN_LIST(V)         \
+  V(addhn,   NEON_ADDHN,   vd.IsD())  \
+  V(addhn2,  NEON_ADDHN2,  vd.IsQ())  \
+  V(raddhn,  NEON_RADDHN,  vd.IsD())  \
+  V(raddhn2, NEON_RADDHN2, vd.IsQ())  \
+  V(subhn,   NEON_SUBHN,   vd.IsD())  \
+  V(subhn2,  NEON_SUBHN2,  vd.IsQ())  \
+  V(rsubhn,  NEON_RSUBHN,  vd.IsD())  \
+  V(rsubhn2, NEON_RSUBHN2, vd.IsQ())
+
+#define DEFINE_ASM_FUNC(FN, OP, AS)        \
+void Assembler::FN(const VRegister& vd,    \
+                   const VRegister& vn,    \
+                   const VRegister& vm) {  \
+  VIXL_ASSERT(AS);                         \
+  NEON3DifferentHN(vd, vn, vm, OP);        \
+}
+NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::uaddw(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm) {
+  VIXL_ASSERT(vm.IsD());
+  NEON3DifferentW(vd, vn, vm, NEON_UADDW);
+}
+
+
+void Assembler::uaddw2(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm) {
+  VIXL_ASSERT(vm.IsQ());
+  NEON3DifferentW(vd, vn, vm, NEON_UADDW2);
+}
+
+
+void Assembler::saddw(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm) {
+  VIXL_ASSERT(vm.IsD());
+  NEON3DifferentW(vd, vn, vm, NEON_SADDW);
+}
+
+
+void Assembler::saddw2(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm) {
+  VIXL_ASSERT(vm.IsQ());
+  NEON3DifferentW(vd, vn, vm, NEON_SADDW2);
+}
+
+
+void Assembler::usubw(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm) {
+  VIXL_ASSERT(vm.IsD());
+  NEON3DifferentW(vd, vn, vm, NEON_USUBW);
+}
+
+
+void Assembler::usubw2(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm) {
+  VIXL_ASSERT(vm.IsQ());
+  NEON3DifferentW(vd, vn, vm, NEON_USUBW2);
+}
+
+
+void Assembler::ssubw(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm) {
+  VIXL_ASSERT(vm.IsD());
+  NEON3DifferentW(vd, vn, vm, NEON_SSUBW);
+}
+
+
+void Assembler::ssubw2(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm) {
+  VIXL_ASSERT(vm.IsQ());
+  NEON3DifferentW(vd, vn, vm, NEON_SSUBW2);
+}
+
+
+void Assembler::mov(const Register& rd, const Register& rm) {
+  // Moves involving the stack pointer are encoded as add immediate with
+  // second operand of zero. Otherwise, orr with first operand zr is
+  // used.
+  if (rd.IsSP() || rm.IsSP()) {
+    add(rd, rm, 0);
+  } else {
+    orr(rd, AppropriateZeroRegFor(rd), rm);
+  }
+}
+
+
+void Assembler::mvn(const Register& rd, const Operand& operand) {
+  orn(rd, AppropriateZeroRegFor(rd), operand);
+}
+
+
+void Assembler::mrs(const Register& rt, SystemRegister sysreg) {
+  VIXL_ASSERT(rt.Is64Bits());
+  Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt));
+}
+
+
+void Assembler::msr(SystemRegister sysreg, const Register& rt) {
+  VIXL_ASSERT(rt.Is64Bits());
+  Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg));
+}
+
+
+void Assembler::clrex(int imm4) {
+  Emit(CLREX | CRm(imm4));
+}
+
+
+void Assembler::dmb(BarrierDomain domain, BarrierType type) {
+  Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type));
+}
+
+
+void Assembler::dsb(BarrierDomain domain, BarrierType type) {
+  Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type));
+}
+
+
+void Assembler::isb() {
+  Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
+}
+
+
+void Assembler::fmov(const VRegister& vd, double imm) {
+  if (vd.IsScalar()) {
+    VIXL_ASSERT(vd.Is1D());
+    Emit(FMOV_d_imm | Rd(vd) | ImmFP64(imm));
+  } else {
+    VIXL_ASSERT(vd.Is2D());
+    Instr op = NEONModifiedImmediate_MOVI | NEONModifiedImmediateOpBit;
+    Instr q = NEON_Q;
+    uint32_t encoded_imm = FP64ToImm8(imm);
+    Emit(q | op | ImmNEONabcdefgh(encoded_imm) | NEONCmode(0xf) | Rd(vd));
+  }
+}
+
+
+void Assembler::fmov(const VRegister& vd, float imm) {
+  if (vd.IsScalar()) {
+    VIXL_ASSERT(vd.Is1S());
+    Emit(FMOV_s_imm | Rd(vd) | ImmFP32(imm));
+  } else {
+    VIXL_ASSERT(vd.Is2S() || vd.Is4S());
+    Instr op = NEONModifiedImmediate_MOVI;
+    Instr q = vd.Is4S() ?  NEON_Q : 0;
+    uint32_t encoded_imm = FP32ToImm8(imm);
+    Emit(q | op | ImmNEONabcdefgh(encoded_imm) | NEONCmode(0xf) | Rd(vd));
+  }
+}
+
+
+void Assembler::fmov(const Register& rd, const VRegister& vn) {
+  VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+  VIXL_ASSERT(rd.size() == vn.size());
+  FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd;
+  Emit(op | Rd(rd) | Rn(vn));
+}
+
+
+void Assembler::fmov(const VRegister& vd, const Register& rn) {
+  VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+  VIXL_ASSERT(vd.size() == rn.size());
+  FPIntegerConvertOp op = vd.Is32Bits() ? FMOV_sw : FMOV_dx;
+  Emit(op | Rd(vd) | Rn(rn));
+}
+
+
+void Assembler::fmov(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+  VIXL_ASSERT(vd.IsSameFormat(vn));
+  Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn));
+}
+
+
+void Assembler::fmov(const VRegister& vd, int index, const Register& rn) {
+  VIXL_ASSERT((index == 1) && vd.Is1D() && rn.IsX());
+  USE(index);
+  Emit(FMOV_d1_x | Rd(vd) | Rn(rn));
+}
+
+
+void Assembler::fmov(const Register& rd, const VRegister& vn, int index) {
+  VIXL_ASSERT((index == 1) && vn.Is1D() && rd.IsX());
+  USE(index);
+  Emit(FMOV_x_d1 | Rd(rd) | Rn(vn));
+}
+
+
+void Assembler::fmadd(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm,
+                      const VRegister& va) {
+  FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FMADD_s : FMADD_d);
+}
+
+
+void Assembler::fmsub(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm,
+                      const VRegister& va) {
+  FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FMSUB_s : FMSUB_d);
+}
+
+
+void Assembler::fnmadd(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm,
+                       const VRegister& va) {
+  FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FNMADD_s : FNMADD_d);
+}
+
+
+void Assembler::fnmsub(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm,
+                       const VRegister& va) {
+  FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FNMSUB_s : FNMSUB_d);
+}
+
+
+void Assembler::fnmul(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm) {
+  VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm));
+  Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d;
+  Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::FPCompareMacro(const VRegister& vn,
+                               double value,
+                               FPTrapFlags trap) {
+  USE(value);
+  // Although the fcmp{e} instructions can strictly only take an immediate
+  // value of +0.0, we don't need to check for -0.0 because the sign of 0.0
+  // doesn't affect the result of the comparison.
+  VIXL_ASSERT(value == 0.0);
+  VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+  Instr op = (trap == EnableTrap) ? FCMPE_zero : FCMP_zero;
+  Emit(FPType(vn) | op | Rn(vn));
+}
+
+
+void Assembler::FPCompareMacro(const VRegister& vn,
+                               const VRegister& vm,
+                               FPTrapFlags trap) {
+  VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+  VIXL_ASSERT(vn.IsSameSizeAndType(vm));
+  Instr op = (trap == EnableTrap) ? FCMPE : FCMP;
+  Emit(FPType(vn) | op | Rm(vm) | Rn(vn));
+}
+
+
+void Assembler::fcmp(const VRegister& vn,
+                     const VRegister& vm) {
+  FPCompareMacro(vn, vm, DisableTrap);
+}
+
+
+void Assembler::fcmpe(const VRegister& vn,
+                      const VRegister& vm) {
+  FPCompareMacro(vn, vm, EnableTrap);
+}
+
+
+void Assembler::fcmp(const VRegister& vn,
+                     double value) {
+  FPCompareMacro(vn, value, DisableTrap);
+}
+
+
+void Assembler::fcmpe(const VRegister& vn,
+                      double value) {
+  FPCompareMacro(vn, value, EnableTrap);
+}
+
+
+void Assembler::FPCCompareMacro(const VRegister& vn,
+                                const VRegister& vm,
+                                StatusFlags nzcv,
+                                Condition cond,
+                                FPTrapFlags trap) {
+  VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+  VIXL_ASSERT(vn.IsSameSizeAndType(vm));
+  Instr op = (trap == EnableTrap) ? FCCMPE : FCCMP;
+  Emit(FPType(vn) | op | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv));
+}
+
+void Assembler::fccmp(const VRegister& vn,
+                      const VRegister& vm,
+                      StatusFlags nzcv,
+                      Condition cond) {
+  FPCCompareMacro(vn, vm, nzcv, cond, DisableTrap);
+}
+
+
+void Assembler::fccmpe(const VRegister& vn,
+                       const VRegister& vm,
+                       StatusFlags nzcv,
+                       Condition cond) {
+  FPCCompareMacro(vn, vm, nzcv, cond, EnableTrap);
+}
+
+
+void Assembler::fcsel(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm,
+                      Condition cond) {
+  VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+  VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+  Emit(FPType(vd) | FCSEL | Rm(vm) | Cond(cond) | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kJSCVT));
+  VIXL_ASSERT(rd.IsW() && vn.Is1D());
+  Emit(FJCVTZS | Rn(vn) | Rd(rd));
+}
+
+
+void Assembler::NEONFPConvertToInt(const Register& rd,
+                                   const VRegister& vn,
+                                   Instr op) {
+  Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd));
+}
+
+
+void Assembler::NEONFPConvertToInt(const VRegister& vd,
+                                   const VRegister& vn,
+                                   Instr op) {
+  if (vn.IsScalar()) {
+    VIXL_ASSERT((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D()));
+    op |= NEON_Q | NEONScalar;
+  }
+  Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvt(const VRegister& vd,
+                     const VRegister& vn) {
+  FPDataProcessing1SourceOp op;
+  if (vd.Is1D()) {
+    VIXL_ASSERT(vn.Is1S() || vn.Is1H());
+    op = vn.Is1S() ? FCVT_ds : FCVT_dh;
+  } else if (vd.Is1S()) {
+    VIXL_ASSERT(vn.Is1D() || vn.Is1H());
+    op = vn.Is1D() ? FCVT_sd : FCVT_sh;
+  } else {
+    VIXL_ASSERT(vd.Is1H());
+    VIXL_ASSERT(vn.Is1D() || vn.Is1S());
+    op = vn.Is1D() ? FCVT_hd : FCVT_hs;
+  }
+  FPDataProcessing1Source(vd, vn, op);
+}
+
+
+void Assembler::fcvtl(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT((vd.Is4S() && vn.Is4H()) ||
+              (vd.Is2D() && vn.Is2S()));
+  Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
+  Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvtl2(const VRegister& vd,
+                       const VRegister& vn) {
+  VIXL_ASSERT((vd.Is4S() && vn.Is8H()) ||
+              (vd.Is2D() && vn.Is4S()));
+  Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
+  Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvtn(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT((vn.Is4S() && vd.Is4H()) ||
+              (vn.Is2D() && vd.Is2S()));
+  Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
+  Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvtn2(const VRegister& vd,
+                       const VRegister& vn) {
+  VIXL_ASSERT((vn.Is4S() && vd.Is8H()) ||
+              (vn.Is2D() && vd.Is4S()));
+  Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
+  Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvtxn(const VRegister& vd,
+                       const VRegister& vn) {
+  Instr format = 1 << NEONSize_offset;
+  if (vd.IsScalar()) {
+    VIXL_ASSERT(vd.Is1S() && vn.Is1D());
+    Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd));
+  } else {
+    VIXL_ASSERT(vd.Is2S() && vn.Is2D());
+    Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd));
+  }
+}
+
+
+void Assembler::fcvtxn2(const VRegister& vd,
+                        const VRegister& vn) {
+  VIXL_ASSERT(vd.Is4S() && vn.Is2D());
+  Instr format = 1 << NEONSize_offset;
+  Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_FP2REGMISC_FCVT_LIST(V)  \
+  V(fcvtnu, NEON_FCVTNU, FCVTNU)      \
+  V(fcvtns, NEON_FCVTNS, FCVTNS)      \
+  V(fcvtpu, NEON_FCVTPU, FCVTPU)      \
+  V(fcvtps, NEON_FCVTPS, FCVTPS)      \
+  V(fcvtmu, NEON_FCVTMU, FCVTMU)      \
+  V(fcvtms, NEON_FCVTMS, FCVTMS)      \
+  V(fcvtau, NEON_FCVTAU, FCVTAU)      \
+  V(fcvtas, NEON_FCVTAS, FCVTAS)
+
+#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP)  \
+void Assembler::FN(const Register& rd,        \
+                   const VRegister& vn) {     \
+  NEONFPConvertToInt(rd, vn, SCA_OP);         \
+}                                             \
+void Assembler::FN(const VRegister& vd,       \
+                   const VRegister& vn) {     \
+  NEONFPConvertToInt(vd, vn, VEC_OP);         \
+}
+NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
+#undef DEFINE_ASM_FUNCS
+
+
+void Assembler::fcvtzs(const Register& rd,
+                       const VRegister& vn,
+                       int fbits) {
+  VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+  VIXL_ASSERT((fbits >= 0) && (fbits <= rd.SizeInBits()));
+  if (fbits == 0) {
+    Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd));
+  } else {
+    Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) |
+         Rd(rd));
+  }
+}
+
+
+void Assembler::fcvtzs(const VRegister& vd,
+                       const VRegister& vn,
+                       int fbits) {
+  VIXL_ASSERT(fbits >= 0);
+  if (fbits == 0) {
+    NEONFP2RegMisc(vd, vn, NEON_FCVTZS);
+  } else {
+    VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+    NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm);
+  }
+}
+
+
+void Assembler::fcvtzu(const Register& rd,
+                       const VRegister& vn,
+                       int fbits) {
+  VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+  VIXL_ASSERT((fbits >= 0) && (fbits <= rd.SizeInBits()));
+  if (fbits == 0) {
+    Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd));
+  } else {
+    Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) |
+         Rd(rd));
+  }
+}
+
+
+void Assembler::fcvtzu(const VRegister& vd,
+                       const VRegister& vn,
+                       int fbits) {
+  VIXL_ASSERT(fbits >= 0);
+  if (fbits == 0) {
+    NEONFP2RegMisc(vd, vn, NEON_FCVTZU);
+  } else {
+    VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+    NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm);
+  }
+}
+
+void Assembler::ucvtf(const VRegister& vd,
+                      const VRegister& vn,
+                      int fbits) {
+  VIXL_ASSERT(fbits >= 0);
+  if (fbits == 0) {
+    NEONFP2RegMisc(vd, vn, NEON_UCVTF);
+  } else {
+    VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+    NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm);
+  }
+}
+
+void Assembler::scvtf(const VRegister& vd,
+                      const VRegister& vn,
+                      int fbits) {
+  VIXL_ASSERT(fbits >= 0);
+  if (fbits == 0) {
+    NEONFP2RegMisc(vd, vn, NEON_SCVTF);
+  } else {
+    VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+    NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm);
+  }
+}
+
+
+void Assembler::scvtf(const VRegister& vd,
+                      const Register& rn,
+                      int fbits) {
+  VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+  VIXL_ASSERT(fbits >= 0);
+  if (fbits == 0) {
+    Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd));
+  } else {
+    Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
+         Rd(vd));
+  }
+}
+
+
+void Assembler::ucvtf(const VRegister& vd,
+                      const Register& rn,
+                      int fbits) {
+  VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+  VIXL_ASSERT(fbits >= 0);
+  if (fbits == 0) {
+    Emit(SF(rn) | FPType(vd) | UCVTF | Rn(rn) | Rd(vd));
+  } else {
+    Emit(SF(rn) | FPType(vd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
+         Rd(vd));
+  }
+}
+
+
+void Assembler::NEON3Same(const VRegister& vd,
+                          const VRegister& vn,
+                          const VRegister& vm,
+                          NEON3SameOp vop) {
+  VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+  VIXL_ASSERT(vd.IsVector() || !vd.IsQ());
+
+  Instr format, op = vop;
+  if (vd.IsScalar()) {
+    op |= NEON_Q | NEONScalar;
+    format = SFormat(vd);
+  } else {
+    format = VFormat(vd);
+  }
+
+  Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONFP3Same(const VRegister& vd,
+                            const VRegister& vn,
+                            const VRegister& vm,
+                            Instr op) {
+  VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+  Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_FP2REGMISC_LIST(V)                 \
+  V(fabs,    NEON_FABS,    FABS)                \
+  V(fneg,    NEON_FNEG,    FNEG)                \
+  V(fsqrt,   NEON_FSQRT,   FSQRT)               \
+  V(frintn,  NEON_FRINTN,  FRINTN)              \
+  V(frinta,  NEON_FRINTA,  FRINTA)              \
+  V(frintp,  NEON_FRINTP,  FRINTP)              \
+  V(frintm,  NEON_FRINTM,  FRINTM)              \
+  V(frintx,  NEON_FRINTX,  FRINTX)              \
+  V(frintz,  NEON_FRINTZ,  FRINTZ)              \
+  V(frinti,  NEON_FRINTI,  FRINTI)              \
+  V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \
+  V(frecpe,  NEON_FRECPE,  NEON_FRECPE_scalar )
+
+
+#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP)            \
+void Assembler::FN(const VRegister& vd,                \
+                   const VRegister& vn) {              \
+  Instr op;                                            \
+  if (vd.IsScalar()) {                                 \
+    VIXL_ASSERT(vd.Is1S() || vd.Is1D());               \
+    op = SCA_OP;                                       \
+  } else {                                             \
+    VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S());  \
+    op = VEC_OP;                                       \
+  }                                                    \
+  NEONFP2RegMisc(vd, vn, op);                          \
+}
+NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+void Assembler::NEONFP2RegMisc(const VRegister& vd,
+                               const VRegister& vn,
+                               Instr op) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEON2RegMisc(const VRegister& vd,
+                             const VRegister& vn,
+                             NEON2RegMiscOp vop,
+                             int value) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(value == 0);
+  USE(value);
+
+  Instr format, op = vop;
+  if (vd.IsScalar()) {
+    op |= NEON_Q | NEONScalar;
+    format = SFormat(vd);
+  } else {
+    format = VFormat(vd);
+  }
+
+  Emit(format | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::cmeq(const VRegister& vd,
+                     const VRegister& vn,
+                     int value) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value);
+}
+
+
+void Assembler::cmge(const VRegister& vd,
+                     const VRegister& vn,
+                     int value) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEON2RegMisc(vd, vn, NEON_CMGE_zero, value);
+}
+
+
+void Assembler::cmgt(const VRegister& vd,
+                     const VRegister& vn,
+                     int value) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEON2RegMisc(vd, vn, NEON_CMGT_zero, value);
+}
+
+
+void Assembler::cmle(const VRegister& vd,
+                     const VRegister& vn,
+                     int value) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEON2RegMisc(vd, vn, NEON_CMLE_zero, value);
+}
+
+
+void Assembler::cmlt(const VRegister& vd,
+                     const VRegister& vn,
+                     int value) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEON2RegMisc(vd, vn, NEON_CMLT_zero, value);
+}
+
+
+void Assembler::shll(const VRegister& vd,
+                     const VRegister& vn,
+                     int shift) {
+  VIXL_ASSERT((vd.Is8H() && vn.Is8B() && shift == 8) ||
+              (vd.Is4S() && vn.Is4H() && shift == 16) ||
+              (vd.Is2D() && vn.Is2S() && shift == 32));
+  USE(shift);
+  Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::shll2(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  USE(shift);
+  VIXL_ASSERT((vd.Is8H() && vn.Is16B() && shift == 8) ||
+              (vd.Is4S() && vn.Is8H() && shift == 16) ||
+              (vd.Is2D() && vn.Is4S() && shift == 32));
+  Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONFP2RegMisc(const VRegister& vd,
+                               const VRegister& vn,
+                               NEON2RegMiscOp vop,
+                               double value) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(value == 0.0);
+  USE(value);
+
+  Instr op = vop;
+  if (vd.IsScalar()) {
+    VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+    op |= NEON_Q | NEONScalar;
+  } else {
+    VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S());
+  }
+
+  Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcmeq(const VRegister& vd,
+                      const VRegister& vn,
+                      double value) {
+  NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
+}
+
+
+void Assembler::fcmge(const VRegister& vd,
+                      const VRegister& vn,
+                      double value) {
+  NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
+}
+
+
+void Assembler::fcmgt(const VRegister& vd,
+                      const VRegister& vn,
+                      double value) {
+  NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
+}
+
+
+void Assembler::fcmle(const VRegister& vd,
+                      const VRegister& vn,
+                      double value) {
+  NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
+}
+
+
+void Assembler::fcmlt(const VRegister& vd,
+                      const VRegister& vn,
+                      double value) {
+  NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
+}
+
+
+void Assembler::frecpx(const VRegister& vd,
+                       const VRegister& vn) {
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+  Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_3SAME_LIST(V) \
+  V(add,      NEON_ADD,      vd.IsVector() || vd.Is1D())            \
+  V(addp,     NEON_ADDP,     vd.IsVector() || vd.Is1D())            \
+  V(sub,      NEON_SUB,      vd.IsVector() || vd.Is1D())            \
+  V(cmeq,     NEON_CMEQ,     vd.IsVector() || vd.Is1D())            \
+  V(cmge,     NEON_CMGE,     vd.IsVector() || vd.Is1D())            \
+  V(cmgt,     NEON_CMGT,     vd.IsVector() || vd.Is1D())            \
+  V(cmhi,     NEON_CMHI,     vd.IsVector() || vd.Is1D())            \
+  V(cmhs,     NEON_CMHS,     vd.IsVector() || vd.Is1D())            \
+  V(cmtst,    NEON_CMTST,    vd.IsVector() || vd.Is1D())            \
+  V(sshl,     NEON_SSHL,     vd.IsVector() || vd.Is1D())            \
+  V(ushl,     NEON_USHL,     vd.IsVector() || vd.Is1D())            \
+  V(srshl,    NEON_SRSHL,    vd.IsVector() || vd.Is1D())            \
+  V(urshl,    NEON_URSHL,    vd.IsVector() || vd.Is1D())            \
+  V(sqdmulh,  NEON_SQDMULH,  vd.IsLaneSizeH() || vd.IsLaneSizeS())  \
+  V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS())  \
+  V(shadd,    NEON_SHADD,    vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(uhadd,    NEON_UHADD,    vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(srhadd,   NEON_SRHADD,   vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(urhadd,   NEON_URHADD,   vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(shsub,    NEON_SHSUB,    vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(uhsub,    NEON_UHSUB,    vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(smax,     NEON_SMAX,     vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(smaxp,    NEON_SMAXP,    vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(smin,     NEON_SMIN,     vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(sminp,    NEON_SMINP,    vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(umax,     NEON_UMAX,     vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(umaxp,    NEON_UMAXP,    vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(umin,     NEON_UMIN,     vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(uminp,    NEON_UMINP,    vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(saba,     NEON_SABA,     vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(sabd,     NEON_SABD,     vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(uaba,     NEON_UABA,     vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(uabd,     NEON_UABD,     vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(mla,      NEON_MLA,      vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(mls,      NEON_MLS,      vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(mul,      NEON_MUL,      vd.IsVector() && !vd.IsLaneSizeD())    \
+  V(and_,     NEON_AND,      vd.Is8B() || vd.Is16B())               \
+  V(orr,      NEON_ORR,      vd.Is8B() || vd.Is16B())               \
+  V(orn,      NEON_ORN,      vd.Is8B() || vd.Is16B())               \
+  V(eor,      NEON_EOR,      vd.Is8B() || vd.Is16B())               \
+  V(bic,      NEON_BIC,      vd.Is8B() || vd.Is16B())               \
+  V(bit,      NEON_BIT,      vd.Is8B() || vd.Is16B())               \
+  V(bif,      NEON_BIF,      vd.Is8B() || vd.Is16B())               \
+  V(bsl,      NEON_BSL,      vd.Is8B() || vd.Is16B())               \
+  V(pmul,     NEON_PMUL,     vd.Is8B() || vd.Is16B())               \
+  V(uqadd,    NEON_UQADD,    true)                                  \
+  V(sqadd,    NEON_SQADD,    true)                                  \
+  V(uqsub,    NEON_UQSUB,    true)                                  \
+  V(sqsub,    NEON_SQSUB,    true)                                  \
+  V(sqshl,    NEON_SQSHL,    true)                                  \
+  V(uqshl,    NEON_UQSHL,    true)                                  \
+  V(sqrshl,   NEON_SQRSHL,   true)                                  \
+  V(uqrshl,   NEON_UQRSHL,   true)
+
+#define DEFINE_ASM_FUNC(FN, OP, AS)        \
+void Assembler::FN(const VRegister& vd,    \
+                   const VRegister& vn,    \
+                   const VRegister& vm) {  \
+  VIXL_ASSERT(AS);                         \
+  NEON3Same(vd, vn, vm, OP);               \
+}
+NEON_3SAME_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+#define NEON_FP3SAME_OP_LIST(V)                  \
+  V(fadd,    NEON_FADD,    FADD)                 \
+  V(fsub,    NEON_FSUB,    FSUB)                 \
+  V(fmul,    NEON_FMUL,    FMUL)                 \
+  V(fdiv,    NEON_FDIV,    FDIV)                 \
+  V(fmax,    NEON_FMAX,    FMAX)                 \
+  V(fmaxnm,  NEON_FMAXNM,  FMAXNM)               \
+  V(fmin,    NEON_FMIN,    FMIN)                 \
+  V(fminnm,  NEON_FMINNM,  FMINNM)               \
+  V(fmulx,   NEON_FMULX,   NEON_FMULX_scalar)    \
+  V(frecps,  NEON_FRECPS,  NEON_FRECPS_scalar)   \
+  V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar)  \
+  V(fabd,    NEON_FABD,    NEON_FABD_scalar)     \
+  V(fmla,    NEON_FMLA,    0)                    \
+  V(fmls,    NEON_FMLS,    0)                    \
+  V(facge,   NEON_FACGE,   NEON_FACGE_scalar)    \
+  V(facgt,   NEON_FACGT,   NEON_FACGT_scalar)    \
+  V(fcmeq,   NEON_FCMEQ,   NEON_FCMEQ_scalar)    \
+  V(fcmge,   NEON_FCMGE,   NEON_FCMGE_scalar)    \
+  V(fcmgt,   NEON_FCMGT,   NEON_FCMGT_scalar)    \
+  V(faddp,   NEON_FADDP,   0)                    \
+  V(fmaxp,   NEON_FMAXP,   0)                    \
+  V(fminp,   NEON_FMINP,   0)                    \
+  V(fmaxnmp, NEON_FMAXNMP, 0)                    \
+  V(fminnmp, NEON_FMINNMP, 0)
+
+#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP)            \
+void Assembler::FN(const VRegister& vd,                \
+                   const VRegister& vn,                \
+                   const VRegister& vm) {              \
+  Instr op;                                            \
+  if ((SCA_OP != 0) && vd.IsScalar()) {                \
+    VIXL_ASSERT(vd.Is1S() || vd.Is1D());               \
+    op = SCA_OP;                                       \
+  } else {                                             \
+    VIXL_ASSERT(vd.IsVector());                        \
+    VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S());  \
+    op = VEC_OP;                                       \
+  }                                                    \
+  NEONFP3Same(vd, vn, vm, op);                         \
+}
+NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+void Assembler::addp(const VRegister& vd,
+                     const VRegister& vn) {
+  VIXL_ASSERT((vd.Is1D() && vn.Is2D()));
+  Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::faddp(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+              (vd.Is1D() && vn.Is2D()));
+  Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fmaxp(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+              (vd.Is1D() && vn.Is2D()));
+  Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fminp(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+              (vd.Is1D() && vn.Is2D()));
+  Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fmaxnmp(const VRegister& vd,
+                        const VRegister& vn) {
+  VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+              (vd.Is1D() && vn.Is2D()));
+  Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fminnmp(const VRegister& vd,
+                        const VRegister& vn) {
+  VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+              (vd.Is1D() && vn.Is2D()));
+  Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::orr(const VRegister& vd,
+                    const int imm8,
+                    const int left_shift) {
+  NEONModifiedImmShiftLsl(vd, imm8, left_shift,
+                          NEONModifiedImmediate_ORR);
+}
+
+
+void Assembler::mov(const VRegister& vd,
+                    const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  if (vd.IsD()) {
+    orr(vd.V8B(), vn.V8B(), vn.V8B());
+  } else {
+    VIXL_ASSERT(vd.IsQ());
+    orr(vd.V16B(), vn.V16B(), vn.V16B());
+  }
+}
+
+
+void Assembler::bic(const VRegister& vd,
+                    const int imm8,
+                    const int left_shift) {
+  NEONModifiedImmShiftLsl(vd, imm8, left_shift,
+                          NEONModifiedImmediate_BIC);
+}
+
+
+void Assembler::movi(const VRegister& vd,
+                     const uint64_t imm,
+                     Shift shift,
+                     const int shift_amount) {
+  VIXL_ASSERT((shift == LSL) || (shift == MSL));
+  if (vd.Is2D() || vd.Is1D()) {
+    VIXL_ASSERT(shift_amount == 0);
+    int imm8 = 0;
+    for (int i = 0; i < 8; ++i) {
+      int byte = (imm >> (i * 8)) & 0xff;
+      VIXL_ASSERT((byte == 0) || (byte == 0xff));
+      if (byte == 0xff) {
+        imm8 |= (1 << i);
+      }
+    }
+    int q = vd.Is2D() ? NEON_Q : 0;
+    Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI |
+         ImmNEONabcdefgh(imm8) | NEONCmode(0xe) | Rd(vd));
+  } else if (shift == LSL) {
+    VIXL_ASSERT(IsUint8(imm));
+    NEONModifiedImmShiftLsl(vd, static_cast<int>(imm), shift_amount,
+                            NEONModifiedImmediate_MOVI);
+  } else {
+    VIXL_ASSERT(IsUint8(imm));
+    NEONModifiedImmShiftMsl(vd, static_cast<int>(imm), shift_amount,
+                            NEONModifiedImmediate_MOVI);
+  }
+}
+
+
+void Assembler::mvn(const VRegister& vd,
+                    const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  if (vd.IsD()) {
+    not_(vd.V8B(), vn.V8B());
+  } else {
+    VIXL_ASSERT(vd.IsQ());
+    not_(vd.V16B(), vn.V16B());
+  }
+}
+
+
+void Assembler::mvni(const VRegister& vd,
+                     const int imm8,
+                     Shift shift,
+                     const int shift_amount) {
+  VIXL_ASSERT((shift == LSL) || (shift == MSL));
+  if (shift == LSL) {
+    NEONModifiedImmShiftLsl(vd, imm8, shift_amount,
+                            NEONModifiedImmediate_MVNI);
+  } else {
+    NEONModifiedImmShiftMsl(vd, imm8, shift_amount,
+                            NEONModifiedImmediate_MVNI);
+  }
+}
+
+
+void Assembler::NEONFPByElement(const VRegister& vd,
+                                const VRegister& vn,
+                                const VRegister& vm,
+                                int vm_index,
+                                NEONByIndexedElementOp vop) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT((vd.Is2S() && vm.Is1S()) ||
+              (vd.Is4S() && vm.Is1S()) ||
+              (vd.Is1S() && vm.Is1S()) ||
+              (vd.Is2D() && vm.Is1D()) ||
+              (vd.Is1D() && vm.Is1D()));
+  VIXL_ASSERT((vm.Is1S() && (vm_index < 4)) ||
+              (vm.Is1D() && (vm_index < 2)));
+
+  Instr op = vop;
+  int index_num_bits = vm.Is1S() ? 2 : 1;
+  if (vd.IsScalar()) {
+    op |= NEON_Q | NEONScalar;
+  }
+
+  Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) |
+       Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONByElement(const VRegister& vd,
+                              const VRegister& vn,
+                              const VRegister& vm,
+                              int vm_index,
+                              NEONByIndexedElementOp vop) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT((vd.Is4H() && vm.Is1H()) ||
+              (vd.Is8H() && vm.Is1H()) ||
+              (vd.Is1H() && vm.Is1H()) ||
+              (vd.Is2S() && vm.Is1S()) ||
+              (vd.Is4S() && vm.Is1S()) ||
+              (vd.Is1S() && vm.Is1S()));
+  VIXL_ASSERT((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
+              (vm.Is1S() && (vm_index < 4)));
+
+  Instr format, op = vop;
+  int index_num_bits = vm.Is1H() ? 3 : 2;
+  if (vd.IsScalar()) {
+    op |= NEONScalar | NEON_Q;
+    format = SFormat(vn);
+  } else {
+    format = VFormat(vn);
+  }
+  Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) |
+       Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONByElementL(const VRegister& vd,
+                               const VRegister& vn,
+                               const VRegister& vm,
+                               int vm_index,
+                               NEONByIndexedElementOp vop) {
+  VIXL_ASSERT((vd.Is4S() && vn.Is4H() && vm.Is1H()) ||
+              (vd.Is4S() && vn.Is8H() && vm.Is1H()) ||
+              (vd.Is1S() && vn.Is1H() && vm.Is1H()) ||
+              (vd.Is2D() && vn.Is2S() && vm.Is1S()) ||
+              (vd.Is2D() && vn.Is4S() && vm.Is1S()) ||
+              (vd.Is1D() && vn.Is1S() && vm.Is1S()));
+
+  VIXL_ASSERT((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
+              (vm.Is1S() && (vm_index < 4)));
+
+  Instr format, op = vop;
+  int index_num_bits = vm.Is1H() ? 3 : 2;
+  if (vd.IsScalar()) {
+    op |= NEONScalar | NEON_Q;
+    format = SFormat(vn);
+  } else {
+    format = VFormat(vn);
+  }
+  Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) |
+       Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_BYELEMENT_LIST(V)                         \
+  V(mul,      NEON_MUL_byelement,      vn.IsVector())  \
+  V(mla,      NEON_MLA_byelement,      vn.IsVector())  \
+  V(mls,      NEON_MLS_byelement,      vn.IsVector())  \
+  V(sqdmulh,  NEON_SQDMULH_byelement,  true)           \
+  V(sqrdmulh, NEON_SQRDMULH_byelement, true)
+
+
+#define DEFINE_ASM_FUNC(FN, OP, AS)        \
+void Assembler::FN(const VRegister& vd,    \
+                   const VRegister& vn,    \
+                   const VRegister& vm,    \
+                   int vm_index) {         \
+  VIXL_ASSERT(AS);                         \
+  NEONByElement(vd, vn, vm, vm_index, OP); \
+}
+NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+#define NEON_FPBYELEMENT_LIST(V) \
+  V(fmul,  NEON_FMUL_byelement)  \
+  V(fmla,  NEON_FMLA_byelement)  \
+  V(fmls,  NEON_FMLS_byelement)  \
+  V(fmulx, NEON_FMULX_byelement)
+
+
+#define DEFINE_ASM_FUNC(FN, OP)              \
+void Assembler::FN(const VRegister& vd,      \
+                   const VRegister& vn,      \
+                   const VRegister& vm,      \
+                   int vm_index) {           \
+  NEONFPByElement(vd, vn, vm, vm_index, OP); \
+}
+NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+#define NEON_BYELEMENT_LONG_LIST(V)                               \
+  V(sqdmull,  NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD())  \
+  V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ())  \
+  V(sqdmlal,  NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD())  \
+  V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ())  \
+  V(sqdmlsl,  NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD())  \
+  V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ())  \
+  V(smull,    NEON_SMULL_byelement,   vn.IsVector() && vn.IsD())  \
+  V(smull2,   NEON_SMULL_byelement,   vn.IsVector() && vn.IsQ())  \
+  V(umull,    NEON_UMULL_byelement,   vn.IsVector() && vn.IsD())  \
+  V(umull2,   NEON_UMULL_byelement,   vn.IsVector() && vn.IsQ())  \
+  V(smlal,    NEON_SMLAL_byelement,   vn.IsVector() && vn.IsD())  \
+  V(smlal2,   NEON_SMLAL_byelement,   vn.IsVector() && vn.IsQ())  \
+  V(umlal,    NEON_UMLAL_byelement,   vn.IsVector() && vn.IsD())  \
+  V(umlal2,   NEON_UMLAL_byelement,   vn.IsVector() && vn.IsQ())  \
+  V(smlsl,    NEON_SMLSL_byelement,   vn.IsVector() && vn.IsD())  \
+  V(smlsl2,   NEON_SMLSL_byelement,   vn.IsVector() && vn.IsQ())  \
+  V(umlsl,    NEON_UMLSL_byelement,   vn.IsVector() && vn.IsD())  \
+  V(umlsl2,   NEON_UMLSL_byelement,   vn.IsVector() && vn.IsQ())
+
+
+#define DEFINE_ASM_FUNC(FN, OP, AS)         \
+void Assembler::FN(const VRegister& vd,     \
+                   const VRegister& vn,     \
+                   const VRegister& vm,     \
+                   int vm_index) {          \
+  VIXL_ASSERT(AS);                          \
+  NEONByElementL(vd, vn, vm, vm_index, OP); \
+}
+NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+void Assembler::suqadd(const VRegister& vd,
+                       const VRegister& vn) {
+  NEON2RegMisc(vd, vn, NEON_SUQADD);
+}
+
+
+void Assembler::usqadd(const VRegister& vd,
+                       const VRegister& vn) {
+  NEON2RegMisc(vd, vn, NEON_USQADD);
+}
+
+
+void Assembler::abs(const VRegister& vd,
+                    const VRegister& vn) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEON2RegMisc(vd, vn, NEON_ABS);
+}
+
+
+void Assembler::sqabs(const VRegister& vd,
+                      const VRegister& vn) {
+  NEON2RegMisc(vd, vn, NEON_SQABS);
+}
+
+
+void Assembler::neg(const VRegister& vd,
+                    const VRegister& vn) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEON2RegMisc(vd, vn, NEON_NEG);
+}
+
+
+void Assembler::sqneg(const VRegister& vd,
+                      const VRegister& vn) {
+  NEON2RegMisc(vd, vn, NEON_SQNEG);
+}
+
+
+void Assembler::NEONXtn(const VRegister& vd,
+                        const VRegister& vn,
+                        NEON2RegMiscOp vop) {
+  Instr format, op = vop;
+  if (vd.IsScalar()) {
+    VIXL_ASSERT((vd.Is1B() && vn.Is1H()) ||
+                (vd.Is1H() && vn.Is1S()) ||
+                (vd.Is1S() && vn.Is1D()));
+    op |= NEON_Q | NEONScalar;
+    format = SFormat(vd);
+  } else {
+    VIXL_ASSERT((vd.Is8B() && vn.Is8H())  ||
+                (vd.Is4H() && vn.Is4S())  ||
+                (vd.Is2S() && vn.Is2D())  ||
+                (vd.Is16B() && vn.Is8H()) ||
+                (vd.Is8H() && vn.Is4S())  ||
+                (vd.Is4S() && vn.Is2D()));
+    format = VFormat(vd);
+  }
+  Emit(format | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::xtn(const VRegister& vd,
+                    const VRegister& vn) {
+  VIXL_ASSERT(vd.IsVector() && vd.IsD());
+  NEONXtn(vd, vn, NEON_XTN);
+}
+
+
+void Assembler::xtn2(const VRegister& vd,
+                     const VRegister& vn) {
+  VIXL_ASSERT(vd.IsVector() && vd.IsQ());
+  NEONXtn(vd, vn, NEON_XTN);
+}
+
+
+void Assembler::sqxtn(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT(vd.IsScalar() || vd.IsD());
+  NEONXtn(vd, vn, NEON_SQXTN);
+}
+
+
+void Assembler::sqxtn2(const VRegister& vd,
+                       const VRegister& vn) {
+  VIXL_ASSERT(vd.IsVector() && vd.IsQ());
+  NEONXtn(vd, vn, NEON_SQXTN);
+}
+
+
+void Assembler::sqxtun(const VRegister& vd,
+                       const VRegister& vn) {
+  VIXL_ASSERT(vd.IsScalar() || vd.IsD());
+  NEONXtn(vd, vn, NEON_SQXTUN);
+}
+
+
+void Assembler::sqxtun2(const VRegister& vd,
+                        const VRegister& vn) {
+  VIXL_ASSERT(vd.IsVector() && vd.IsQ());
+  NEONXtn(vd, vn, NEON_SQXTUN);
+}
+
+
+void Assembler::uqxtn(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT(vd.IsScalar() || vd.IsD());
+  NEONXtn(vd, vn, NEON_UQXTN);
+}
+
+
+void Assembler::uqxtn2(const VRegister& vd,
+                       const VRegister& vn) {
+  VIXL_ASSERT(vd.IsVector() && vd.IsQ());
+  NEONXtn(vd, vn, NEON_UQXTN);
+}
+
+
+// NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size".
+void Assembler::not_(const VRegister& vd,
+                     const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+  Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::rbit(const VRegister& vd,
+                     const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+  Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::ext(const VRegister& vd,
+                    const VRegister& vn,
+                    const VRegister& vm,
+                    int index) {
+  VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+  VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+  VIXL_ASSERT((0 <= index) && (index < vd.lanes()));
+  Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::dup(const VRegister& vd,
+                    const VRegister& vn,
+                    int vn_index) {
+  Instr q, scalar;
+
+  // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
+  // number of lanes, and T is b, h, s or d.
+  int lane_size = vn.LaneSizeInBytes();
+  NEONFormatField format;
+  switch (lane_size) {
+    case 1: format = NEON_16B; break;
+    case 2: format = NEON_8H;  break;
+    case 4: format = NEON_4S;  break;
+    default:
+      VIXL_ASSERT(lane_size == 8);
+      format = NEON_2D;
+      break;
+  }
+
+  if (vd.IsScalar()) {
+    q = NEON_Q;
+    scalar = NEONScalar;
+  } else {
+    VIXL_ASSERT(!vd.Is1D());
+    q = vd.IsD() ? 0 : NEON_Q;
+    scalar = 0;
+  }
+  Emit(q | scalar | NEON_DUP_ELEMENT |
+       ImmNEON5(format, vn_index) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::mov(const VRegister& vd,
+                    const VRegister& vn,
+                    int vn_index) {
+  VIXL_ASSERT(vn.IsScalar());
+  dup(vd, vn, vn_index);
+}
+
+
+void Assembler::dup(const VRegister& vd, const Register& rn) {
+  VIXL_ASSERT(!vd.Is1D());
+  VIXL_ASSERT(vd.Is2D() == rn.IsX());
+  int q = vd.IsD() ? 0 : NEON_Q;
+  Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd));
+}
+
+
+void Assembler::ins(const VRegister& vd,
+                    int vd_index,
+                    const VRegister& vn,
+                    int vn_index) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+  // number of lanes, and T is b, h, s or d.
+  int lane_size = vd.LaneSizeInBytes();
+  NEONFormatField format;
+  switch (lane_size) {
+    case 1: format = NEON_16B; break;
+    case 2: format = NEON_8H;  break;
+    case 4: format = NEON_4S;  break;
+    default:
+      VIXL_ASSERT(lane_size == 8);
+      format = NEON_2D;
+      break;
+  }
+
+  VIXL_ASSERT((0 <= vd_index) &&
+          (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+  VIXL_ASSERT((0 <= vn_index) &&
+          (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+  Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) |
+       ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::mov(const VRegister& vd,
+                    int vd_index,
+                    const VRegister& vn,
+                    int vn_index) {
+  ins(vd, vd_index, vn, vn_index);
+}
+
+
+void Assembler::ins(const VRegister& vd,
+                    int vd_index,
+                    const Register& rn) {
+  // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+  // number of lanes, and T is b, h, s or d.
+  int lane_size = vd.LaneSizeInBytes();
+  NEONFormatField format;
+  switch (lane_size) {
+    case 1: format = NEON_16B; VIXL_ASSERT(rn.IsW()); break;
+    case 2: format = NEON_8H;  VIXL_ASSERT(rn.IsW()); break;
+    case 4: format = NEON_4S;  VIXL_ASSERT(rn.IsW()); break;
+    default:
+      VIXL_ASSERT(lane_size == 8);
+      VIXL_ASSERT(rn.IsX());
+      format = NEON_2D;
+      break;
+  }
+
+  VIXL_ASSERT((0 <= vd_index) &&
+          (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+  Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd));
+}
+
+
+void Assembler::mov(const VRegister& vd,
+                    int vd_index,
+                    const Register& rn) {
+  ins(vd, vd_index, rn);
+}
+
+
+void Assembler::umov(const Register& rd,
+                     const VRegister& vn,
+                     int vn_index) {
+  // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+  // number of lanes, and T is b, h, s or d.
+  int lane_size = vn.LaneSizeInBytes();
+  NEONFormatField format;
+  Instr q = 0;
+  switch (lane_size) {
+    case 1: format = NEON_16B; VIXL_ASSERT(rd.IsW()); break;
+    case 2: format = NEON_8H;  VIXL_ASSERT(rd.IsW()); break;
+    case 4: format = NEON_4S;  VIXL_ASSERT(rd.IsW()); break;
+    default:
+      VIXL_ASSERT(lane_size == 8);
+      VIXL_ASSERT(rd.IsX());
+      format = NEON_2D;
+      q = NEON_Q;
+      break;
+  }
+
+  VIXL_ASSERT((0 <= vn_index) &&
+          (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+  Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
+}
+
+
+void Assembler::mov(const Register& rd,
+                    const VRegister& vn,
+                    int vn_index) {
+  VIXL_ASSERT(vn.SizeInBytes() >= 4);
+  umov(rd, vn, vn_index);
+}
+
+
+void Assembler::smov(const Register& rd,
+                     const VRegister& vn,
+                     int vn_index) {
+  // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+  // number of lanes, and T is b, h, s.
+  int lane_size = vn.LaneSizeInBytes();
+  NEONFormatField format;
+  Instr q = 0;
+  VIXL_ASSERT(lane_size != 8);
+  switch (lane_size) {
+    case 1: format = NEON_16B; break;
+    case 2: format = NEON_8H;  break;
+    default:
+      VIXL_ASSERT(lane_size == 4);
+      VIXL_ASSERT(rd.IsX());
+      format = NEON_4S;
+      break;
+  }
+  q = rd.IsW() ? 0 : NEON_Q;
+  VIXL_ASSERT((0 <= vn_index) &&
+          (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+  Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
+}
+
+
+void Assembler::cls(const VRegister& vd,
+                    const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(!vd.Is1D() && !vd.Is2D());
+  Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::clz(const VRegister& vd,
+                    const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(!vd.Is1D() && !vd.Is2D());
+  Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::cnt(const VRegister& vd,
+                    const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+  Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::rev16(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+  Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::rev32(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H());
+  Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::rev64(const VRegister& vd,
+                      const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(!vd.Is1D() && !vd.Is2D());
+  Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::ursqrte(const VRegister& vd,
+                        const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(vd.Is2S() || vd.Is4S());
+  Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::urecpe(const VRegister& vd,
+                       const VRegister& vn) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  VIXL_ASSERT(vd.Is2S() || vd.Is4S());
+  Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONAddlp(const VRegister& vd,
+                          const VRegister& vn,
+                          NEON2RegMiscOp op) {
+  VIXL_ASSERT((op == NEON_SADDLP) ||
+              (op == NEON_UADDLP) ||
+              (op == NEON_SADALP) ||
+              (op == NEON_UADALP));
+
+  VIXL_ASSERT((vn.Is8B() && vd.Is4H()) ||
+              (vn.Is4H() && vd.Is2S()) ||
+              (vn.Is2S() && vd.Is1D()) ||
+              (vn.Is16B() && vd.Is8H())||
+              (vn.Is8H() && vd.Is4S()) ||
+              (vn.Is4S() && vd.Is2D()));
+  Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::saddlp(const VRegister& vd,
+                       const VRegister& vn) {
+  NEONAddlp(vd, vn, NEON_SADDLP);
+}
+
+
+void Assembler::uaddlp(const VRegister& vd,
+                       const VRegister& vn) {
+  NEONAddlp(vd, vn, NEON_UADDLP);
+}
+
+
+void Assembler::sadalp(const VRegister& vd,
+                       const VRegister& vn) {
+  NEONAddlp(vd, vn, NEON_SADALP);
+}
+
+
+void Assembler::uadalp(const VRegister& vd,
+                       const VRegister& vn) {
+  NEONAddlp(vd, vn, NEON_UADALP);
+}
+
+
+void Assembler::NEONAcrossLanesL(const VRegister& vd,
+                                 const VRegister& vn,
+                                 NEONAcrossLanesOp op) {
+  VIXL_ASSERT((vn.Is8B()  && vd.Is1H()) ||
+              (vn.Is16B() && vd.Is1H()) ||
+              (vn.Is4H()  && vd.Is1S()) ||
+              (vn.Is8H()  && vd.Is1S()) ||
+              (vn.Is4S()  && vd.Is1D()));
+  Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::saddlv(const VRegister& vd,
+                       const VRegister& vn) {
+  NEONAcrossLanesL(vd, vn, NEON_SADDLV);
+}
+
+
+void Assembler::uaddlv(const VRegister& vd,
+                       const VRegister& vn) {
+  NEONAcrossLanesL(vd, vn, NEON_UADDLV);
+}
+
+
+void Assembler::NEONAcrossLanes(const VRegister& vd,
+                                const VRegister& vn,
+                                NEONAcrossLanesOp op) {
+  VIXL_ASSERT((vn.Is8B()  && vd.Is1B()) ||
+              (vn.Is16B() && vd.Is1B()) ||
+              (vn.Is4H()  && vd.Is1H()) ||
+              (vn.Is8H()  && vd.Is1H()) ||
+              (vn.Is4S()  && vd.Is1S()));
+  if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+    Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
+  } else {
+    Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
+  }
+}
+
+
+#define NEON_ACROSSLANES_LIST(V) \
+  V(fmaxv,   NEON_FMAXV,   vd.Is1S()) \
+  V(fminv,   NEON_FMINV,   vd.Is1S()) \
+  V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \
+  V(fminnmv, NEON_FMINNMV, vd.Is1S()) \
+  V(addv,    NEON_ADDV,    true)      \
+  V(smaxv,   NEON_SMAXV,   true)      \
+  V(sminv,   NEON_SMINV,   true)      \
+  V(umaxv,   NEON_UMAXV,   true)      \
+  V(uminv,   NEON_UMINV,   true)
+
+
+#define DEFINE_ASM_FUNC(FN, OP, AS)        \
+void Assembler::FN(const VRegister& vd,    \
+                   const VRegister& vn) {  \
+  VIXL_ASSERT(AS);                         \
+  NEONAcrossLanes(vd, vn, OP);             \
+}
+NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+void Assembler::NEONPerm(const VRegister& vd,
+                         const VRegister& vn,
+                         const VRegister& vm,
+                         NEONPermOp op) {
+  VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+  VIXL_ASSERT(!vd.Is1D());
+  Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::trn1(const VRegister& vd,
+                     const VRegister& vn,
+                     const VRegister& vm) {
+  NEONPerm(vd, vn, vm, NEON_TRN1);
+}
+
+
+void Assembler::trn2(const VRegister& vd,
+                     const VRegister& vn,
+                     const VRegister& vm) {
+  NEONPerm(vd, vn, vm, NEON_TRN2);
+}
+
+
+void Assembler::uzp1(const VRegister& vd,
+                     const VRegister& vn,
+                     const VRegister& vm) {
+  NEONPerm(vd, vn, vm, NEON_UZP1);
+}
+
+
+void Assembler::uzp2(const VRegister& vd,
+                     const VRegister& vn,
+                     const VRegister& vm) {
+  NEONPerm(vd, vn, vm, NEON_UZP2);
+}
+
+
+void Assembler::zip1(const VRegister& vd,
+                     const VRegister& vn,
+                     const VRegister& vm) {
+  NEONPerm(vd, vn, vm, NEON_ZIP1);
+}
+
+
+void Assembler::zip2(const VRegister& vd,
+                     const VRegister& vn,
+                     const VRegister& vm) {
+  NEONPerm(vd, vn, vm, NEON_ZIP2);
+}
+
+
+void Assembler::NEONShiftImmediate(const VRegister& vd,
+                                   const VRegister& vn,
+                                   NEONShiftImmediateOp op,
+                                   int immh_immb) {
+  VIXL_ASSERT(AreSameFormat(vd, vn));
+  Instr q, scalar;
+  if (vn.IsScalar()) {
+    q = NEON_Q;
+    scalar = NEONScalar;
+  } else {
+    q = vd.IsD() ? 0 : NEON_Q;
+    scalar = 0;
+  }
+  Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONShiftLeftImmediate(const VRegister& vd,
+                                       const VRegister& vn,
+                                       int shift,
+                                       NEONShiftImmediateOp op) {
+  int laneSizeInBits = vn.LaneSizeInBits();
+  VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits));
+  NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
+}
+
+
+void Assembler::NEONShiftRightImmediate(const VRegister& vd,
+                                        const VRegister& vn,
+                                        int shift,
+                                        NEONShiftImmediateOp op) {
+  int laneSizeInBits = vn.LaneSizeInBits();
+  VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits));
+  NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
+}
+
+
+void Assembler::NEONShiftImmediateL(const VRegister& vd,
+                                    const VRegister& vn,
+                                    int shift,
+                                    NEONShiftImmediateOp op) {
+  int laneSizeInBits = vn.LaneSizeInBits();
+  VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits));
+  int immh_immb = (laneSizeInBits + shift) << 16;
+
+  VIXL_ASSERT((vn.Is8B() && vd.Is8H()) ||
+              (vn.Is4H() && vd.Is4S()) ||
+              (vn.Is2S() && vd.Is2D()) ||
+              (vn.Is16B() && vd.Is8H())||
+              (vn.Is8H() && vd.Is4S()) ||
+              (vn.Is4S() && vd.Is2D()));
+  Instr q;
+  q = vn.IsD() ? 0 : NEON_Q;
+  Emit(q | op | immh_immb | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONShiftImmediateN(const VRegister& vd,
+                                    const VRegister& vn,
+                                    int shift,
+                                    NEONShiftImmediateOp op) {
+  Instr q, scalar;
+  int laneSizeInBits = vd.LaneSizeInBits();
+  VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits));
+  int immh_immb = (2 * laneSizeInBits - shift) << 16;
+
+  if (vn.IsScalar()) {
+    VIXL_ASSERT((vd.Is1B() && vn.Is1H()) ||
+                (vd.Is1H() && vn.Is1S()) ||
+                (vd.Is1S() && vn.Is1D()));
+    q = NEON_Q;
+    scalar = NEONScalar;
+  } else {
+    VIXL_ASSERT((vd.Is8B() && vn.Is8H()) ||
+                (vd.Is4H() && vn.Is4S()) ||
+                (vd.Is2S() && vn.Is2D()) ||
+                (vd.Is16B() && vn.Is8H())||
+                (vd.Is8H() && vn.Is4S()) ||
+                (vd.Is4S() && vn.Is2D()));
+    scalar = 0;
+    q = vd.IsD() ? 0 : NEON_Q;
+  }
+  Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::shl(const VRegister& vd,
+                    const VRegister& vn,
+                    int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL);
+}
+
+
+void Assembler::sli(const VRegister& vd,
+                    const VRegister& vn,
+                    int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI);
+}
+
+
+void Assembler::sqshl(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm);
+}
+
+
+void Assembler::sqshlu(const VRegister& vd,
+                       const VRegister& vn,
+                       int shift) {
+  NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU);
+}
+
+
+void Assembler::uqshl(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm);
+}
+
+
+void Assembler::sshll(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  VIXL_ASSERT(vn.IsD());
+  NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
+}
+
+
+void Assembler::sshll2(const VRegister& vd,
+                       const VRegister& vn,
+                       int shift) {
+  VIXL_ASSERT(vn.IsQ());
+  NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
+}
+
+
+void Assembler::sxtl(const VRegister& vd,
+                     const VRegister& vn) {
+  sshll(vd, vn, 0);
+}
+
+
+void Assembler::sxtl2(const VRegister& vd,
+                      const VRegister& vn) {
+  sshll2(vd, vn, 0);
+}
+
+
+void Assembler::ushll(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  VIXL_ASSERT(vn.IsD());
+  NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
+}
+
+
+void Assembler::ushll2(const VRegister& vd,
+                       const VRegister& vn,
+                       int shift) {
+  VIXL_ASSERT(vn.IsQ());
+  NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
+}
+
+
+void Assembler::uxtl(const VRegister& vd,
+                     const VRegister& vn) {
+  ushll(vd, vn, 0);
+}
+
+
+void Assembler::uxtl2(const VRegister& vd,
+                      const VRegister& vn) {
+  ushll2(vd, vn, 0);
+}
+
+
+void Assembler::sri(const VRegister& vd,
+                    const VRegister& vn,
+                    int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_SRI);
+}
+
+
+void Assembler::sshr(const VRegister& vd,
+                     const VRegister& vn,
+                     int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR);
+}
+
+
+void Assembler::ushr(const VRegister& vd,
+                     const VRegister& vn,
+                     int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_USHR);
+}
+
+
+void Assembler::srshr(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR);
+}
+
+
+void Assembler::urshr(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR);
+}
+
+
+void Assembler::ssra(const VRegister& vd,
+                     const VRegister& vn,
+                     int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA);
+}
+
+
+void Assembler::usra(const VRegister& vd,
+                     const VRegister& vn,
+                     int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_USRA);
+}
+
+
+void Assembler::srsra(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA);
+}
+
+
+void Assembler::ursra(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+  NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA);
+}
+
+
+void Assembler::shrn(const VRegister& vd,
+                     const VRegister& vn,
+                     int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsD());
+  NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
+}
+
+
+void Assembler::shrn2(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+  NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
+}
+
+
+void Assembler::rshrn(const VRegister& vd,
+                      const VRegister& vn,
+                      int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsD());
+  NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
+}
+
+
+void Assembler::rshrn2(const VRegister& vd,
+                       const VRegister& vn,
+                       int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+  NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
+}
+
+
+void Assembler::sqshrn(const VRegister& vd,
+                       const VRegister& vn,
+                       int shift) {
+  VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+  NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
+}
+
+
+void Assembler::sqshrn2(const VRegister& vd,
+                        const VRegister& vn,
+                        int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+  NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
+}
+
+
+void Assembler::sqrshrn(const VRegister& vd,
+                        const VRegister& vn,
+                        int shift) {
+  VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+  NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
+}
+
+
+void Assembler::sqrshrn2(const VRegister& vd,
+                         const VRegister& vn,
+                         int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+  NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
+}
+
+
+void Assembler::sqshrun(const VRegister& vd,
+                        const VRegister& vn,
+                        int shift) {
+  VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+  NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
+}
+
+
+void Assembler::sqshrun2(const VRegister& vd,
+                         const VRegister& vn,
+                         int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+  NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
+}
+
+
+void Assembler::sqrshrun(const VRegister& vd,
+                         const VRegister& vn,
+                         int shift) {
+  VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+  NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
+}
+
+
+void Assembler::sqrshrun2(const VRegister& vd,
+                          const VRegister& vn,
+                          int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+  NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
+}
+
+
+void Assembler::uqshrn(const VRegister& vd,
+                       const VRegister& vn,
+                       int shift) {
+  VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+  NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
+}
+
+
+void Assembler::uqshrn2(const VRegister& vd,
+                        const VRegister& vn,
+                        int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+  NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
+}
+
+
+void Assembler::uqrshrn(const VRegister& vd,
+                        const VRegister& vn,
+                        int shift) {
+  VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+  NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
+}
+
+
+void Assembler::uqrshrn2(const VRegister& vd,
+                         const VRegister& vn,
+                         int shift) {
+  VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+  NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
+}
+
+
+// Note:
+// Below, a difference in case for the same letter indicates a
+// negated bit.
+// If b is 1, then B is 0.
+uint32_t Assembler::FP32ToImm8(float imm) {
+  VIXL_ASSERT(IsImmFP32(imm));
+  // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
+  uint32_t bits = FloatToRawbits(imm);
+  // bit7: a000.0000
+  uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
+  // bit6: 0b00.0000
+  uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
+  // bit5_to_0: 00cd.efgh
+  uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
+
+  return bit7 | bit6 | bit5_to_0;
+}
+
+
+Instr Assembler::ImmFP32(float imm) {
+  return FP32ToImm8(imm) << ImmFP_offset;
+}
+
+
+uint32_t Assembler::FP64ToImm8(double imm) {
+  VIXL_ASSERT(IsImmFP64(imm));
+  // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+  //       0000.0000.0000.0000.0000.0000.0000.0000
+  uint64_t bits = DoubleToRawbits(imm);
+  // bit7: a000.0000
+  uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
+  // bit6: 0b00.0000
+  uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
+  // bit5_to_0: 00cd.efgh
+  uint64_t bit5_to_0 = (bits >> 48) & 0x3f;
+
+  return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
+}
+
+
+Instr Assembler::ImmFP64(double imm) {
+  return FP64ToImm8(imm) << ImmFP_offset;
+}
+
+
+// Code generation helpers.
+void Assembler::MoveWide(const Register& rd,
+                         uint64_t imm,
+                         int shift,
+                         MoveWideImmediateOp mov_op) {
+  // Ignore the top 32 bits of an immediate if we're moving to a W register.
+  if (rd.Is32Bits()) {
+    // Check that the top 32 bits are zero (a positive 32-bit number) or top
+    // 33 bits are one (a negative 32-bit number, sign extended to 64 bits).
+    VIXL_ASSERT(((imm >> kWRegSize) == 0) ||
+                ((imm >> (kWRegSize - 1)) == 0x1ffffffff));
+    imm &= kWRegMask;
+  }
+
+  if (shift >= 0) {
+    // Explicit shift specified.
+    VIXL_ASSERT((shift == 0) || (shift == 16) ||
+                (shift == 32) || (shift == 48));
+    VIXL_ASSERT(rd.Is64Bits() || (shift == 0) || (shift == 16));
+    shift /= 16;
+  } else {
+    // Calculate a new immediate and shift combination to encode the immediate
+    // argument.
+    shift = 0;
+    if ((imm & 0xffffffffffff0000) == 0) {
+      // Nothing to do.
+    } else if ((imm & 0xffffffff0000ffff) == 0) {
+      imm >>= 16;
+      shift = 1;
+    } else if ((imm & 0xffff0000ffffffff) == 0) {
+      VIXL_ASSERT(rd.Is64Bits());
+      imm >>= 32;
+      shift = 2;
+    } else if ((imm & 0x0000ffffffffffff) == 0) {
+      VIXL_ASSERT(rd.Is64Bits());
+      imm >>= 48;
+      shift = 3;
+    }
+  }
+
+  VIXL_ASSERT(IsUint16(imm));
+
+  Emit(SF(rd) | MoveWideImmediateFixed | mov_op |
+       Rd(rd) | ImmMoveWide(imm) | ShiftMoveWide(shift));
+}
+
+
+void Assembler::AddSub(const Register& rd,
+                       const Register& rn,
+                       const Operand& operand,
+                       FlagsUpdate S,
+                       AddSubOp op) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  if (operand.IsImmediate()) {
+    int64_t immediate = operand.immediate();
+    VIXL_ASSERT(IsImmAddSub(immediate));
+    Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
+    Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) |
+         ImmAddSub(static_cast<int>(immediate)) | dest_reg | RnSP(rn));
+  } else if (operand.IsShiftedRegister()) {
+    VIXL_ASSERT(operand.reg().size() == rd.size());
+    VIXL_ASSERT(operand.shift() != ROR);
+
+    // For instructions of the form:
+    //   add/sub   wsp, <Wn>, <Wm> [, LSL #0-3 ]
+    //   add/sub   <Wd>, wsp, <Wm> [, LSL #0-3 ]
+    //   add/sub   wsp, wsp, <Wm> [, LSL #0-3 ]
+    //   adds/subs <Wd>, wsp, <Wm> [, LSL #0-3 ]
+    // or their 64-bit register equivalents, convert the operand from shifted to
+    // extended register mode, and emit an add/sub extended instruction.
+    if (rn.IsSP() || rd.IsSP()) {
+      VIXL_ASSERT(!(rd.IsSP() && (S == SetFlags)));
+      DataProcExtendedRegister(rd, rn, operand.ToExtendedRegister(), S,
+                               AddSubExtendedFixed | op);
+    } else {
+      DataProcShiftedRegister(rd, rn, operand, S, AddSubShiftedFixed | op);
+    }
+  } else {
+    VIXL_ASSERT(operand.IsExtendedRegister());
+    DataProcExtendedRegister(rd, rn, operand, S, AddSubExtendedFixed | op);
+  }
+}
+
+
+void Assembler::AddSubWithCarry(const Register& rd,
+                                const Register& rn,
+                                const Operand& operand,
+                                FlagsUpdate S,
+                                AddSubWithCarryOp op) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  VIXL_ASSERT(rd.size() == operand.reg().size());
+  VIXL_ASSERT(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
+  Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::hlt(int code) {
+  VIXL_ASSERT(IsUint16(code));
+  Emit(HLT | ImmException(code));
+}
+
+
+void Assembler::brk(int code) {
+  VIXL_ASSERT(IsUint16(code));
+  Emit(BRK | ImmException(code));
+}
+
+
+void Assembler::svc(int code) {
+  Emit(SVC | ImmException(code));
+}
+
+
+void Assembler::ConditionalCompare(const Register& rn,
+                                   const Operand& operand,
+                                   StatusFlags nzcv,
+                                   Condition cond,
+                                   ConditionalCompareOp op) {
+  Instr ccmpop;
+  if (operand.IsImmediate()) {
+    int64_t immediate = operand.immediate();
+    VIXL_ASSERT(IsImmConditionalCompare(immediate));
+    ccmpop = ConditionalCompareImmediateFixed | op |
+        ImmCondCmp(static_cast<unsigned>(immediate));
+  } else {
+    VIXL_ASSERT(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
+    ccmpop = ConditionalCompareRegisterFixed | op | Rm(operand.reg());
+  }
+  Emit(SF(rn) | ccmpop | Cond(cond) | Rn(rn) | Nzcv(nzcv));
+}
+
+
+void Assembler::DataProcessing1Source(const Register& rd,
+                                      const Register& rn,
+                                      DataProcessing1SourceOp op) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  Emit(SF(rn) | op | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::FPDataProcessing1Source(const VRegister& vd,
+                                        const VRegister& vn,
+                                        FPDataProcessing1SourceOp op) {
+  VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D());
+  Emit(FPType(vn) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::FPDataProcessing3Source(const VRegister& vd,
+                                        const VRegister& vn,
+                                        const VRegister& vm,
+                                        const VRegister& va,
+                                        FPDataProcessing3SourceOp op) {
+  VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+  VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm, va));
+  Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd) | Ra(va));
+}
+
+
+void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd,
+                                        const int imm8,
+                                        const int left_shift,
+                                        NEONModifiedImmediateOp op) {
+  VIXL_ASSERT(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() ||
+              vd.Is2S() || vd.Is4S());
+  VIXL_ASSERT((left_shift == 0) || (left_shift == 8) ||
+              (left_shift == 16) || (left_shift == 24));
+  VIXL_ASSERT(IsUint8(imm8));
+
+  int cmode_1, cmode_2, cmode_3;
+  if (vd.Is8B() || vd.Is16B()) {
+    VIXL_ASSERT(op == NEONModifiedImmediate_MOVI);
+    cmode_1 = 1;
+    cmode_2 = 1;
+    cmode_3 = 1;
+  } else {
+    cmode_1 = (left_shift >> 3) & 1;
+    cmode_2 = left_shift >> 4;
+    cmode_3 = 0;
+    if (vd.Is4H() || vd.Is8H()) {
+      VIXL_ASSERT((left_shift == 0) || (left_shift == 8));
+      cmode_3 = 1;
+    }
+  }
+  int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1);
+
+  int q = vd.IsQ() ? NEON_Q : 0;
+
+  Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
+}
+
+
+void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd,
+                                        const int imm8,
+                                        const int shift_amount,
+                                        NEONModifiedImmediateOp op) {
+  VIXL_ASSERT(vd.Is2S() || vd.Is4S());
+  VIXL_ASSERT((shift_amount == 8) || (shift_amount == 16));
+  VIXL_ASSERT(IsUint8(imm8));
+
+  int cmode_0 = (shift_amount >> 4) & 1;
+  int cmode = 0xc | cmode_0;
+
+  int q = vd.IsQ() ? NEON_Q : 0;
+
+  Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
+}
+
+
+void Assembler::EmitShift(const Register& rd,
+                          const Register& rn,
+                          Shift shift,
+                          unsigned shift_amount) {
+  switch (shift) {
+    case LSL:
+      lsl(rd, rn, shift_amount);
+      break;
+    case LSR:
+      lsr(rd, rn, shift_amount);
+      break;
+    case ASR:
+      asr(rd, rn, shift_amount);
+      break;
+    case ROR:
+      ror(rd, rn, shift_amount);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+}
+
+
+void Assembler::EmitExtendShift(const Register& rd,
+                                const Register& rn,
+                                Extend extend,
+                                unsigned left_shift) {
+  VIXL_ASSERT(rd.size() >= rn.size());
+  unsigned reg_size = rd.size();
+  // Use the correct size of register.
+  Register rn_ = Register(rn.code(), rd.size());
+  // Bits extracted are high_bit:0.
+  unsigned high_bit = (8 << (extend & 0x3)) - 1;
+  // Number of bits left in the result that are not introduced by the shift.
+  unsigned non_shift_bits = (reg_size - left_shift) & (reg_size - 1);
+
+  if ((non_shift_bits > high_bit) || (non_shift_bits == 0)) {
+    switch (extend) {
+      case UXTB:
+      case UXTH:
+      case UXTW: ubfm(rd, rn_, non_shift_bits, high_bit); break;
+      case SXTB:
+      case SXTH:
+      case SXTW: sbfm(rd, rn_, non_shift_bits, high_bit); break;
+      case UXTX:
+      case SXTX: {
+        VIXL_ASSERT(rn.size() == kXRegSize);
+        // Nothing to extend. Just shift.
+        lsl(rd, rn_, left_shift);
+        break;
+      }
+      default: VIXL_UNREACHABLE();
+    }
+  } else {
+    // No need to extend as the extended bits would be shifted away.
+    lsl(rd, rn_, left_shift);
+  }
+}
+
+
+void Assembler::DataProcExtendedRegister(const Register& rd,
+                                         const Register& rn,
+                                         const Operand& operand,
+                                         FlagsUpdate S,
+                                         Instr op) {
+  Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
+  Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) |
+       ExtendMode(operand.extend()) | ImmExtendShift(operand.shift_amount()) |
+       dest_reg | RnSP(rn));
+}
+
+
+Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
+                                     unsigned access_size,
+                                     LoadStoreScalingOption option) {
+  Instr base = RnSP(addr.base());
+  int64_t offset = addr.offset();
+
+  if (addr.IsImmediateOffset()) {
+    bool prefer_unscaled = (option == PreferUnscaledOffset) ||
+                           (option == RequireUnscaledOffset);
+    if (prefer_unscaled && IsImmLSUnscaled(offset)) {
+      // Use the unscaled addressing mode.
+      return base | LoadStoreUnscaledOffsetFixed |
+          ImmLS(static_cast<int>(offset));
+    }
+
+    if ((option != RequireUnscaledOffset) &&
+        IsImmLSScaled(offset, access_size)) {
+      // Use the scaled addressing mode.
+      return base | LoadStoreUnsignedOffsetFixed |
+          ImmLSUnsigned(static_cast<int>(offset) >> access_size);
+    }
+
+    if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) {
+      // Use the unscaled addressing mode.
+      return base | LoadStoreUnscaledOffsetFixed |
+          ImmLS(static_cast<int>(offset));
+    }
+  }
+
+  // All remaining addressing modes are register-offset, pre-indexed or
+  // post-indexed modes.
+  VIXL_ASSERT((option != RequireUnscaledOffset) &&
+              (option != RequireScaledOffset));
+
+  if (addr.IsRegisterOffset()) {
+    Extend ext = addr.extend();
+    Shift shift = addr.shift();
+    unsigned shift_amount = addr.shift_amount();
+
+    // LSL is encoded in the option field as UXTX.
+    if (shift == LSL) {
+      ext = UXTX;
+    }
+
+    // Shifts are encoded in one bit, indicating a left shift by the memory
+    // access size.
+    VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size));
+    return base | LoadStoreRegisterOffsetFixed | Rm(addr.regoffset()) |
+        ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0);
+  }
+
+  if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) {
+    return base | LoadStorePreIndexFixed | ImmLS(static_cast<int>(offset));
+  }
+
+  if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) {
+    return base | LoadStorePostIndexFixed | ImmLS(static_cast<int>(offset));
+  }
+
+  // If this point is reached, the MemOperand (addr) cannot be encoded.
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+
+void Assembler::LoadStore(const CPURegister& rt,
+                          const MemOperand& addr,
+                          LoadStoreOp op,
+                          LoadStoreScalingOption option) {
+  Emit(op | Rt(rt) | LoadStoreMemOperand(addr, CalcLSDataSize(op), option));
+}
+
+
+void Assembler::Prefetch(PrefetchOperation op,
+                         const MemOperand& addr,
+                         LoadStoreScalingOption option) {
+  VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset());
+
+  Instr prfop = ImmPrefetchOperation(op);
+  Emit(PRFM | prfop | LoadStoreMemOperand(addr, kXRegSizeInBytesLog2, option));
+}
+
+
+bool Assembler::IsImmAddSub(int64_t immediate) {
+  return IsUint12(immediate) ||
+         (IsUint12(immediate >> 12) && ((immediate & 0xfff) == 0));
+}
+
+
+bool Assembler::IsImmConditionalCompare(int64_t immediate) {
+  return IsUint5(immediate);
+}
+
+
+bool Assembler::IsImmFP32(float imm) {
+  // Valid values will have the form:
+  // aBbb.bbbc.defg.h000.0000.0000.0000.0000
+  uint32_t bits = FloatToRawbits(imm);
+  // bits[19..0] are cleared.
+  if ((bits & 0x7ffff) != 0) {
+    return false;
+  }
+
+  // bits[29..25] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 16) & 0x3e00;
+  if (b_pattern != 0 && b_pattern != 0x3e00) {
+    return false;
+  }
+
+  // bit[30] and bit[29] are opposite.
+  if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
+    return false;
+  }
+
+  return true;
+}
+
+
+bool Assembler::IsImmFP64(double imm) {
+  // Valid values will have the form:
+  // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+  // 0000.0000.0000.0000.0000.0000.0000.0000
+  uint64_t bits = DoubleToRawbits(imm);
+  // bits[47..0] are cleared.
+  if ((bits & 0x0000ffffffffffff) != 0) {
+    return false;
+  }
+
+  // bits[61..54] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+  if ((b_pattern != 0) && (b_pattern != 0x3fc0)) {
+    return false;
+  }
+
+  // bit[62] and bit[61] are opposite.
+  if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) {
+    return false;
+  }
+
+  return true;
+}
+
+
+bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) {
+  VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
+  bool offset_is_size_multiple =
+      (((offset >> access_size) << access_size) == offset);
+  return offset_is_size_multiple && IsInt7(offset >> access_size);
+}
+
+
+bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) {
+  VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
+  bool offset_is_size_multiple =
+      (((offset >> access_size) << access_size) == offset);
+  return offset_is_size_multiple && IsUint12(offset >> access_size);
+}
+
+
+bool Assembler::IsImmLSUnscaled(int64_t offset) {
+  return IsInt9(offset);
+}
+
+
+// The movn instruction can generate immediates containing an arbitrary 16-bit
+// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff.
+bool Assembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
+  return IsImmMovz(~imm, reg_size);
+}
+
+
+// The movz instruction can generate immediates containing an arbitrary 16-bit
+// value, with remaining bits clear, eg. 0x00001234, 0x0000123400000000.
+bool Assembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
+  VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
+  return CountClearHalfWords(imm, reg_size) >= ((reg_size / 16) - 1);
+}
+
+
+// Test if a given value can be encoded in the immediate field of a logical
+// instruction.
+// If it can be encoded, the function returns true, and values pointed to by n,
+// imm_s and imm_r are updated with immediates encoded in the format required
+// by the corresponding fields in the logical instruction.
+// If it can not be encoded, the function returns false, and the values pointed
+// to by n, imm_s and imm_r are undefined.
+bool Assembler::IsImmLogical(uint64_t value,
+                             unsigned width,
+                             unsigned* n,
+                             unsigned* imm_s,
+                             unsigned* imm_r) {
+  VIXL_ASSERT((width == kWRegSize) || (width == kXRegSize));
+
+  bool negate = false;
+
+  // Logical immediates are encoded using parameters n, imm_s and imm_r using
+  // the following table:
+  //
+  //    N   imms    immr    size        S             R
+  //    1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+  //    0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+  //    0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+  //    0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+  //    0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+  //    0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1 bits
+  // are set. The pattern is rotated right by R, and repeated across a 32 or
+  // 64-bit value, depending on destination register width.
+  //
+  // Put another way: the basic format of a logical immediate is a single
+  // contiguous stretch of 1 bits, repeated across the whole word at intervals
+  // given by a power of 2. To identify them quickly, we first locate the
+  // lowest stretch of 1 bits, then the next 1 bit above that; that combination
+  // is different for every logical immediate, so it gives us all the
+  // information we need to identify the only logical immediate that our input
+  // could be, and then we simply check if that's the value we actually have.
+  //
+  // (The rotation parameter does give the possibility of the stretch of 1 bits
+  // going 'round the end' of the word. To deal with that, we observe that in
+  // any situation where that happens the bitwise NOT of the value is also a
+  // valid logical immediate. So we simply invert the input whenever its low bit
+  // is set, and then we know that the rotated case can't arise.)
+
+  if (value & 1) {
+    // If the low bit is 1, negate the value, and set a flag to remember that we
+    // did (so that we can adjust the return values appropriately).
+    negate = true;
+    value = ~value;
+  }
+
+  if (width == kWRegSize) {
+    // To handle 32-bit logical immediates, the very easiest thing is to repeat
+    // the input value twice to make a 64-bit word. The correct encoding of that
+    // as a logical immediate will also be the correct encoding of the 32-bit
+    // value.
+
+    // Avoid making the assumption that the most-significant 32 bits are zero by
+    // shifting the value left and duplicating it.
+    value <<= kWRegSize;
+    value |= value >> kWRegSize;
+  }
+
+  // The basic analysis idea: imagine our input word looks like this.
+  //
+  //    0011111000111110001111100011111000111110001111100011111000111110
+  //                                                          c  b    a
+  //                                                          |<--d-->|
+  //
+  // We find the lowest set bit (as an actual power-of-2 value, not its index)
+  // and call it a. Then we add a to our original number, which wipes out the
+  // bottommost stretch of set bits and replaces it with a 1 carried into the
+  // next zero bit. Then we look for the new lowest set bit, which is in
+  // position b, and subtract it, so now our number is just like the original
+  // but with the lowest stretch of set bits completely gone. Now we find the
+  // lowest set bit again, which is position c in the diagram above. Then we'll
+  // measure the distance d between bit positions a and c (using CLZ), and that
+  // tells us that the only valid logical immediate that could possibly be equal
+  // to this number is the one in which a stretch of bits running from a to just
+  // below b is replicated every d bits.
+  uint64_t a = LowestSetBit(value);
+  uint64_t value_plus_a = value + a;
+  uint64_t b = LowestSetBit(value_plus_a);
+  uint64_t value_plus_a_minus_b = value_plus_a - b;
+  uint64_t c = LowestSetBit(value_plus_a_minus_b);
+
+  int d, clz_a, out_n;
+  uint64_t mask;
+
+  if (c != 0) {
+    // The general case, in which there is more than one stretch of set bits.
+    // Compute the repeat distance d, and set up a bitmask covering the basic
+    // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
+    // of these cases the N bit of the output will be zero.
+    clz_a = CountLeadingZeros(a, kXRegSize);
+    int clz_c = CountLeadingZeros(c, kXRegSize);
+    d = clz_a - clz_c;
+    mask = ((UINT64_C(1) << d) - 1);
+    out_n = 0;
+  } else {
+    // Handle degenerate cases.
+    //
+    // If any of those 'find lowest set bit' operations didn't find a set bit at
+    // all, then the word will have been zero thereafter, so in particular the
+    // last lowest_set_bit operation will have returned zero. So we can test for
+    // all the special case conditions in one go by seeing if c is zero.
+    if (a == 0) {
+      // The input was zero (or all 1 bits, which will come to here too after we
+      // inverted it at the start of the function), for which we just return
+      // false.
+      return false;
+    } else {
+      // Otherwise, if c was zero but a was not, then there's just one stretch
+      // of set bits in our word, meaning that we have the trivial case of
+      // d == 64 and only one 'repetition'. Set up all the same variables as in
+      // the general case above, and set the N bit in the output.
+      clz_a = CountLeadingZeros(a, kXRegSize);
+      d = 64;
+      mask = ~UINT64_C(0);
+      out_n = 1;
+    }
+  }
+
+  // If the repeat period d is not a power of two, it can't be encoded.
+  if (!IsPowerOf2(d)) {
+    return false;
+  }
+
+  if (((b - a) & ~mask) != 0) {
+    // If the bit stretch (b - a) does not fit within the mask derived from the
+    // repeat period, then fail.
+    return false;
+  }
+
+  // The only possible option is b - a repeated every d bits. Now we're going to
+  // actually construct the valid logical immediate derived from that
+  // specification, and see if it equals our original input.
+  //
+  // To repeat a value every d bits, we multiply it by a number of the form
+  // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
+  // be derived using a table lookup on CLZ(d).
+  static const uint64_t multipliers[] = {
+    0x0000000000000001UL,
+    0x0000000100000001UL,
+    0x0001000100010001UL,
+    0x0101010101010101UL,
+    0x1111111111111111UL,
+    0x5555555555555555UL,
+  };
+  uint64_t multiplier = multipliers[CountLeadingZeros(d, kXRegSize) - 57];
+  uint64_t candidate = (b - a) * multiplier;
+
+  if (value != candidate) {
+    // The candidate pattern doesn't match our input value, so fail.
+    return false;
+  }
+
+  // We have a match! This is a valid logical immediate, so now we have to
+  // construct the bits and pieces of the instruction encoding that generates
+  // it.
+
+  // Count the set bits in our basic stretch. The special case of clz(0) == -1
+  // makes the answer come out right for stretches that reach the very top of
+  // the word (e.g. numbers like 0xffffc00000000000).
+  int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSize);
+  int s = clz_a - clz_b;
+
+  // Decide how many bits to rotate right by, to put the low bit of that basic
+  // stretch in position a.
+  int r;
+  if (negate) {
+    // If we inverted the input right at the start of this function, here's
+    // where we compensate: the number of set bits becomes the number of clear
+    // bits, and the rotation count is based on position b rather than position
+    // a (since b is the location of the 'lowest' 1 bit after inversion).
+    s = d - s;
+    r = (clz_b + 1) & (d - 1);
+  } else {
+    r = (clz_a + 1) & (d - 1);
+  }
+
+  // Now we're done, except for having to encode the S output in such a way that
+  // it gives both the number of set bits and the length of the repeated
+  // segment. The s field is encoded like this:
+  //
+  //     imms    size        S
+  //    ssssss    64    UInt(ssssss)
+  //    0sssss    32    UInt(sssss)
+  //    10ssss    16    UInt(ssss)
+  //    110sss     8    UInt(sss)
+  //    1110ss     4    UInt(ss)
+  //    11110s     2    UInt(s)
+  //
+  // So we 'or' (-d << 1) with our computed s to form imms.
+  if ((n != NULL) || (imm_s != NULL) || (imm_r != NULL)) {
+    *n = out_n;
+    *imm_s = ((-d << 1) | (s - 1)) & 0x3f;
+    *imm_r = r;
+  }
+
+  return true;
+}
+
+
+LoadStoreOp Assembler::LoadOpFor(const CPURegister& rt) {
+  VIXL_ASSERT(rt.IsValid());
+  if (rt.IsRegister()) {
+    return rt.Is64Bits() ? LDR_x : LDR_w;
+  } else {
+    VIXL_ASSERT(rt.IsVRegister());
+    switch (rt.SizeInBits()) {
+      case kBRegSize: return LDR_b;
+      case kHRegSize: return LDR_h;
+      case kSRegSize: return LDR_s;
+      case kDRegSize: return LDR_d;
+      default:
+        VIXL_ASSERT(rt.IsQ());
+        return LDR_q;
+    }
+  }
+}
+
+
+LoadStoreOp Assembler::StoreOpFor(const CPURegister& rt) {
+  VIXL_ASSERT(rt.IsValid());
+  if (rt.IsRegister()) {
+    return rt.Is64Bits() ? STR_x : STR_w;
+  } else {
+    VIXL_ASSERT(rt.IsVRegister());
+    switch (rt.SizeInBits()) {
+      case kBRegSize: return STR_b;
+      case kHRegSize: return STR_h;
+      case kSRegSize: return STR_s;
+      case kDRegSize: return STR_d;
+      default:
+        VIXL_ASSERT(rt.IsQ());
+        return STR_q;
+    }
+  }
+}
+
+
+LoadStorePairOp Assembler::StorePairOpFor(const CPURegister& rt,
+    const CPURegister& rt2) {
+  VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+  USE(rt2);
+  if (rt.IsRegister()) {
+    return rt.Is64Bits() ? STP_x : STP_w;
+  } else {
+    VIXL_ASSERT(rt.IsVRegister());
+    switch (rt.SizeInBytes()) {
+      case kSRegSizeInBytes: return STP_s;
+      case kDRegSizeInBytes: return STP_d;
+      default:
+        VIXL_ASSERT(rt.IsQ());
+        return STP_q;
+    }
+  }
+}
+
+
+LoadStorePairOp Assembler::LoadPairOpFor(const CPURegister& rt,
+                                         const CPURegister& rt2) {
+  VIXL_ASSERT((STP_w | LoadStorePairLBit) == LDP_w);
+  return static_cast<LoadStorePairOp>(StorePairOpFor(rt, rt2) |
+                                      LoadStorePairLBit);
+}
+
+
+LoadStorePairNonTemporalOp Assembler::StorePairNonTemporalOpFor(
+    const CPURegister& rt, const CPURegister& rt2) {
+  VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+  USE(rt2);
+  if (rt.IsRegister()) {
+    return rt.Is64Bits() ? STNP_x : STNP_w;
+  } else {
+    VIXL_ASSERT(rt.IsVRegister());
+    switch (rt.SizeInBytes()) {
+      case kSRegSizeInBytes: return STNP_s;
+      case kDRegSizeInBytes: return STNP_d;
+      default:
+        VIXL_ASSERT(rt.IsQ());
+        return STNP_q;
+    }
+  }
+}
+
+
+LoadStorePairNonTemporalOp Assembler::LoadPairNonTemporalOpFor(
+    const CPURegister& rt, const CPURegister& rt2) {
+  VIXL_ASSERT((STNP_w | LoadStorePairNonTemporalLBit) == LDNP_w);
+  return static_cast<LoadStorePairNonTemporalOp>(
+      StorePairNonTemporalOpFor(rt, rt2) | LoadStorePairNonTemporalLBit);
+}
+
+
+LoadLiteralOp Assembler::LoadLiteralOpFor(const CPURegister& rt) {
+  if (rt.IsRegister()) {
+    return rt.IsX() ? LDR_x_lit : LDR_w_lit;
+  } else {
+    VIXL_ASSERT(rt.IsVRegister());
+    switch (rt.SizeInBytes()) {
+      case kSRegSizeInBytes: return LDR_s_lit;
+      case kDRegSizeInBytes: return LDR_d_lit;
+      default:
+        VIXL_ASSERT(rt.IsQ());
+        return LDR_q_lit;
+    }
+  }
+}
+
+
+bool Assembler::CPUHas(const CPURegister& rt) const {
+  // Core registers are available without any particular CPU features.
+  if (rt.IsRegister()) return true;
+  VIXL_ASSERT(rt.IsVRegister());
+  // The architecture does not allow FP and NEON to be implemented separately,
+  // but we can crudely categorise them based on register size, since FP only
+  // uses D, S and (occasionally) H registers.
+  if (rt.IsH() || rt.IsS() || rt.IsD()) {
+    return CPUHas(CPUFeatures::kFP) || CPUHas(CPUFeatures::kNEON);
+  }
+  VIXL_ASSERT(rt.IsB() || rt.IsQ());
+  return CPUHas(CPUFeatures::kNEON);
+}
+
+
+bool Assembler::CPUHas(const CPURegister& rt, const CPURegister& rt2) const {
+  // This is currently only used for loads and stores, where rt and rt2 must
+  // have the same size and type. We could extend this to cover other cases if
+  // necessary, but for now we can avoid checking both registers.
+  VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+  USE(rt2);
+  return CPUHas(rt);
+}
+
+
+bool Assembler::CPUHas(SystemRegister sysreg) const {
+  switch (sysreg) {
+    case RNDR:
+    case RNDRRS:
+      return CPUHas(CPUFeatures::kRNG);
+    case FPCR:
+    case NZCV:
+      break;
+  }
+  return true;
+}
+
+
+bool AreAliased(const CPURegister& reg1, const CPURegister& reg2,
+                const CPURegister& reg3, const CPURegister& reg4,
+                const CPURegister& reg5, const CPURegister& reg6,
+                const CPURegister& reg7, const CPURegister& reg8) {
+  int number_of_valid_regs = 0;
+  int number_of_valid_fpregs = 0;
+
+  RegList unique_regs = 0;
+  RegList unique_fpregs = 0;
+
+  const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
+
+  for (unsigned i = 0; i < sizeof(regs) / sizeof(regs[0]); i++) {
+    if (regs[i].IsRegister()) {
+      number_of_valid_regs++;
+      unique_regs |= regs[i].Bit();
+    } else if (regs[i].IsVRegister()) {
+      number_of_valid_fpregs++;
+      unique_fpregs |= regs[i].Bit();
+    } else {
+      VIXL_ASSERT(!regs[i].IsValid());
+    }
+  }
+
+  int number_of_unique_regs = CountSetBits(unique_regs);
+  int number_of_unique_fpregs = CountSetBits(unique_fpregs);
+
+  VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
+  VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs);
+
+  return (number_of_valid_regs != number_of_unique_regs) ||
+         (number_of_valid_fpregs != number_of_unique_fpregs);
+}
+
+
+bool AreSameSizeAndType(const CPURegister& reg1, const CPURegister& reg2,
+                        const CPURegister& reg3, const CPURegister& reg4,
+                        const CPURegister& reg5, const CPURegister& reg6,
+                        const CPURegister& reg7, const CPURegister& reg8) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
+  match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
+  match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
+  match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
+  match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
+  match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
+  match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
+  return match;
+}
+
+bool AreEven(const CPURegister& reg1,
+             const CPURegister& reg2,
+             const CPURegister& reg3,
+             const CPURegister& reg4,
+             const CPURegister& reg5,
+             const CPURegister& reg6,
+             const CPURegister& reg7,
+             const CPURegister& reg8) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool even = (reg1.code() % 2) == 0;
+  even &= !reg2.IsValid() || ((reg2.code() % 2) == 0);
+  even &= !reg3.IsValid() || ((reg3.code() % 2) == 0);
+  even &= !reg4.IsValid() || ((reg4.code() % 2) == 0);
+  even &= !reg5.IsValid() || ((reg5.code() % 2) == 0);
+  even &= !reg6.IsValid() || ((reg6.code() % 2) == 0);
+  even &= !reg7.IsValid() || ((reg7.code() % 2) == 0);
+  even &= !reg8.IsValid() || ((reg8.code() % 2) == 0);
+  return even;
+}
+
+bool AreConsecutive(const CPURegister& reg1,
+                    const CPURegister& reg2,
+                    const CPURegister& reg3,
+                    const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+
+  if (!reg2.IsValid()) {
+    return true;
+  } else if (reg2.code() != ((reg1.code() + 1) % kNumberOfRegisters)) {
+    return false;
+  }
+
+  if (!reg3.IsValid()) {
+    return true;
+  } else if (reg3.code() != ((reg2.code() + 1) % kNumberOfRegisters)) {
+    return false;
+  }
+
+  if (!reg4.IsValid()) {
+    return true;
+  } else if (reg4.code() != ((reg3.code() + 1) % kNumberOfRegisters)) {
+    return false;
+  }
+
+  return true;
+}
+
+bool AreSameFormat(const VRegister& reg1, const VRegister& reg2,
+                   const VRegister& reg3, const VRegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
+  match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
+  match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
+  return match;
+}
+
+
+bool AreConsecutive(const VRegister& reg1, const VRegister& reg2,
+                    const VRegister& reg3, const VRegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() ||
+           (reg2.code() == ((reg1.code() + 1) % kNumberOfVRegisters));
+  match &= !reg3.IsValid() ||
+           (reg3.code() == ((reg1.code() + 2) % kNumberOfVRegisters));
+  match &= !reg4.IsValid() ||
+           (reg4.code() == ((reg1.code() + 3) % kNumberOfVRegisters));
+  return match;
+}
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Assembler-vixl.h b/js/src/jit/arm64/vixl/Assembler-vixl.h
new file mode 100644
index 0000000000..462b359eea
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Assembler-vixl.h
@@ -0,0 +1,4974 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_ASSEMBLER_A64_H_
+#define VIXL_A64_ASSEMBLER_A64_H_
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "jit/arm64/vixl/MozBaseAssembler-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+#include "jit/JitSpewer.h"
+
+#include "jit/shared/Assembler-shared.h"
+#include "jit/shared/Disassembler-shared.h"
+#include "jit/shared/IonAssemblerBufferWithConstantPools.h"
+
+#if defined(_M_ARM64)
+#ifdef mvn
+#undef mvn
+#endif
+#endif
+
+namespace vixl {
+
+using js::jit::BufferOffset;
+using js::jit::Label;
+using js::jit::Address;
+using js::jit::BaseIndex;
+using js::jit::DisassemblerSpew;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+
+typedef uint64_t RegList;
+static const int kRegListSizeInBits = sizeof(RegList) * 8;
+
+
+// Registers.
+
+// Some CPURegister methods can return Register or VRegister types, so we need
+// to declare them in advance.
+class Register;
+class VRegister;
+
+class CPURegister {
+ public:
+  enum RegisterType {
+    // The kInvalid value is used to detect uninitialized static instances,
+    // which are always zero-initialized before any constructors are called.
+    kInvalid = 0,
+    kRegister,
+    kVRegister,
+    kFPRegister = kVRegister,
+    kNoRegister
+  };
+
+  constexpr CPURegister() : code_(0), size_(0), type_(kNoRegister) {
+  }
+
+  constexpr CPURegister(unsigned code, unsigned size, RegisterType type)
+      : code_(code), size_(size), type_(type) {
+  }
+
+  unsigned code() const {
+    VIXL_ASSERT(IsValid());
+    return code_;
+  }
+
+  RegisterType type() const {
+    VIXL_ASSERT(IsValidOrNone());
+    return type_;
+  }
+
+  RegList Bit() const {
+    VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
+    return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
+  }
+
+  unsigned size() const {
+    VIXL_ASSERT(IsValid());
+    return size_;
+  }
+
+  int SizeInBytes() const {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(size() % 8 == 0);
+    return size_ / 8;
+  }
+
+  int SizeInBits() const {
+    VIXL_ASSERT(IsValid());
+    return size_;
+  }
+
+  bool Is8Bits() const {
+    VIXL_ASSERT(IsValid());
+    return size_ == 8;
+  }
+
+  bool Is16Bits() const {
+    VIXL_ASSERT(IsValid());
+    return size_ == 16;
+  }
+
+  bool Is32Bits() const {
+    VIXL_ASSERT(IsValid());
+    return size_ == 32;
+  }
+
+  bool Is64Bits() const {
+    VIXL_ASSERT(IsValid());
+    return size_ == 64;
+  }
+
+  bool Is128Bits() const {
+    VIXL_ASSERT(IsValid());
+    return size_ == 128;
+  }
+
+  bool IsValid() const {
+    if (IsValidRegister() || IsValidVRegister()) {
+      VIXL_ASSERT(!IsNone());
+      return true;
+    } else {
+      // This assert is hit when the register has not been properly initialized.
+      // One cause for this can be an initialisation order fiasco. See
+      // https://isocpp.org/wiki/faq/ctors#static-init-order for some details.
+      VIXL_ASSERT(IsNone());
+      return false;
+    }
+  }
+
+  bool IsValidRegister() const {
+    return IsRegister() &&
+           ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
+           ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
+  }
+
+  bool IsValidVRegister() const {
+    return IsVRegister() &&
+           ((size_ == kBRegSize) || (size_ == kHRegSize) ||
+            (size_ == kSRegSize) || (size_ == kDRegSize) ||
+            (size_ == kQRegSize)) &&
+           (code_ < kNumberOfVRegisters);
+  }
+
+  bool IsValidFPRegister() const {
+    return IsFPRegister() && (code_ < kNumberOfVRegisters);
+  }
+
+  bool IsNone() const {
+    // kNoRegister types should always have size 0 and code 0.
+    VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
+    VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
+
+    return type_ == kNoRegister;
+  }
+
+  bool Aliases(const CPURegister& other) const {
+    VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
+    return (code_ == other.code_) && (type_ == other.type_);
+  }
+
+  bool Is(const CPURegister& other) const {
+    VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
+    return Aliases(other) && (size_ == other.size_);
+  }
+
+  bool IsZero() const {
+    VIXL_ASSERT(IsValid());
+    return IsRegister() && (code_ == kZeroRegCode);
+  }
+
+  bool IsSP() const {
+    VIXL_ASSERT(IsValid());
+    return IsRegister() && (code_ == kSPRegInternalCode);
+  }
+
+  bool IsRegister() const {
+    return type_ == kRegister;
+  }
+
+  bool IsVRegister() const {
+    return type_ == kVRegister;
+  }
+
+  bool IsFPRegister() const {
+    return IsS() || IsD();
+  }
+
+  bool IsW() const { return IsValidRegister() && Is32Bits(); }
+  bool IsX() const { return IsValidRegister() && Is64Bits(); }
+
+  // These assertions ensure that the size and type of the register are as
+  // described. They do not consider the number of lanes that make up a vector.
+  // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
+  // does not imply Is1D() or Is8B().
+  // Check the number of lanes, ie. the format of the vector, using methods such
+  // as Is8B(), Is1D(), etc. in the VRegister class.
+  bool IsV() const { return IsVRegister(); }
+  bool IsB() const { return IsV() && Is8Bits(); }
+  bool IsH() const { return IsV() && Is16Bits(); }
+  bool IsS() const { return IsV() && Is32Bits(); }
+  bool IsD() const { return IsV() && Is64Bits(); }
+  bool IsQ() const { return IsV() && Is128Bits(); }
+
+  const Register& W() const;
+  const Register& X() const;
+  const VRegister& V() const;
+  const VRegister& B() const;
+  const VRegister& H() const;
+  const VRegister& S() const;
+  const VRegister& D() const;
+  const VRegister& Q() const;
+
+  bool IsSameSizeAndType(const CPURegister& other) const {
+    return (size_ == other.size_) && (type_ == other.type_);
+  }
+
+ protected:
+  unsigned code_;
+  unsigned size_;
+  RegisterType type_;
+
+ private:
+  bool IsValidOrNone() const {
+    return IsValid() || IsNone();
+  }
+};
+
+
+class Register : public CPURegister {
+ public:
+  Register() : CPURegister() {}
+  explicit Register(const CPURegister& other)
+      : CPURegister(other.code(), other.size(), other.type()) {
+    VIXL_ASSERT(IsValidRegister());
+  }
+  constexpr Register(unsigned code, unsigned size)
+      : CPURegister(code, size, kRegister) {}
+
+  constexpr Register(js::jit::Register r, unsigned size)
+    : CPURegister(r.code(), size, kRegister) {}
+
+  bool IsValid() const {
+    VIXL_ASSERT(IsRegister() || IsNone());
+    return IsValidRegister();
+  }
+
+  js::jit::Register asUnsized() const {
+    // asUnsized() is only ever used on temp registers or on registers that
+    // are known not to be SP, and there should be no risk of it being
+    // applied to SP.  Check anyway.
+    VIXL_ASSERT(code_ != kSPRegInternalCode);
+    return js::jit::Register::FromCode((js::jit::Register::Code)code_);
+  }
+
+
+  static const Register& WRegFromCode(unsigned code);
+  static const Register& XRegFromCode(unsigned code);
+
+ private:
+  static const Register wregisters[];
+  static const Register xregisters[];
+};
+
+
+class VRegister : public CPURegister {
+ public:
+  VRegister() : CPURegister(), lanes_(1) {}
+  explicit VRegister(const CPURegister& other)
+      : CPURegister(other.code(), other.size(), other.type()), lanes_(1) {
+    VIXL_ASSERT(IsValidVRegister());
+    VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
+  }
+  constexpr VRegister(unsigned code, unsigned size, unsigned lanes = 1)
+      : CPURegister(code, size, kVRegister), lanes_(lanes) {
+    // VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
+  }
+  constexpr VRegister(js::jit::FloatRegister r)
+      : CPURegister(r.encoding(), r.size() * 8, kVRegister), lanes_(1) {
+  }
+  constexpr VRegister(js::jit::FloatRegister r, unsigned size)
+      : CPURegister(r.encoding(), size, kVRegister), lanes_(1) {
+  }
+  VRegister(unsigned code, VectorFormat format)
+      : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
+        lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
+    VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
+  }
+
+  bool IsValid() const {
+    VIXL_ASSERT(IsVRegister() || IsNone());
+    return IsValidVRegister();
+  }
+
+  static const VRegister& BRegFromCode(unsigned code);
+  static const VRegister& HRegFromCode(unsigned code);
+  static const VRegister& SRegFromCode(unsigned code);
+  static const VRegister& DRegFromCode(unsigned code);
+  static const VRegister& QRegFromCode(unsigned code);
+  static const VRegister& VRegFromCode(unsigned code);
+
+  VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
+  VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
+  VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
+  VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
+  VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
+  VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
+  VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
+  VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
+
+  bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
+  bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
+  bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
+  bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
+  bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
+  bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
+  bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
+  bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
+
+  // For consistency, we assert the number of lanes of these scalar registers,
+  // even though there are no vectors of equivalent total size with which they
+  // could alias.
+  bool Is1B() const {
+    VIXL_ASSERT(!(Is8Bits() && IsVector()));
+    return Is8Bits();
+  }
+  bool Is1H() const {
+    VIXL_ASSERT(!(Is16Bits() && IsVector()));
+    return Is16Bits();
+  }
+  bool Is1S() const {
+    VIXL_ASSERT(!(Is32Bits() && IsVector()));
+    return Is32Bits();
+  }
+
+  bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; }
+  bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; }
+  bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; }
+  bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; }
+
+  int lanes() const {
+    return lanes_;
+  }
+
+  bool IsScalar() const {
+    return lanes_ == 1;
+  }
+
+  bool IsVector() const {
+    return lanes_ > 1;
+  }
+
+  bool IsSameFormat(const VRegister& other) const {
+    return (size_ == other.size_) && (lanes_ == other.lanes_);
+  }
+
+  unsigned LaneSizeInBytes() const {
+    return SizeInBytes() / lanes_;
+  }
+
+  unsigned LaneSizeInBits() const {
+    return LaneSizeInBytes() * 8;
+  }
+
+ private:
+  static const VRegister bregisters[];
+  static const VRegister hregisters[];
+  static const VRegister sregisters[];
+  static const VRegister dregisters[];
+  static const VRegister qregisters[];
+  static const VRegister vregisters[];
+  int lanes_;
+};
+
+
+// Backward compatibility for FPRegisters.
+typedef VRegister FPRegister;
+
+// No*Reg is used to indicate an unused argument, or an error case. Note that
+// these all compare equal (using the Is() method). The Register and VRegister
+// variants are provided for convenience.
+const Register NoReg;
+const VRegister NoVReg;
+const FPRegister NoFPReg;  // For backward compatibility.
+const CPURegister NoCPUReg;
+
+
+#define DEFINE_REGISTERS(N)  \
+constexpr Register w##N(N, kWRegSize);  \
+constexpr Register x##N(N, kXRegSize);
+REGISTER_CODE_LIST(DEFINE_REGISTERS)
+#undef DEFINE_REGISTERS
+constexpr Register wsp(kSPRegInternalCode, kWRegSize);
+constexpr Register sp(kSPRegInternalCode, kXRegSize);
+
+
+#define DEFINE_VREGISTERS(N)  \
+constexpr VRegister b##N(N, kBRegSize);  \
+constexpr VRegister h##N(N, kHRegSize);  \
+constexpr VRegister s##N(N, kSRegSize);  \
+constexpr VRegister d##N(N, kDRegSize);  \
+constexpr VRegister q##N(N, kQRegSize);  \
+constexpr VRegister v##N(N, kQRegSize);
+REGISTER_CODE_LIST(DEFINE_VREGISTERS)
+#undef DEFINE_VREGISTERS
+
+
+// Registers aliases.
+constexpr Register ip0 = x16;
+constexpr Register ip1 = x17;
+constexpr Register lr = x30;
+constexpr Register xzr = x31;
+constexpr Register wzr = w31;
+
+
+// AreAliased returns true if any of the named registers overlap. Arguments
+// set to NoReg are ignored. The system stack pointer may be specified.
+bool AreAliased(const CPURegister& reg1,
+                const CPURegister& reg2,
+                const CPURegister& reg3 = NoReg,
+                const CPURegister& reg4 = NoReg,
+                const CPURegister& reg5 = NoReg,
+                const CPURegister& reg6 = NoReg,
+                const CPURegister& reg7 = NoReg,
+                const CPURegister& reg8 = NoReg);
+
+
+// AreSameSizeAndType returns true if all of the specified registers have the
+// same size, and are of the same type. The system stack pointer may be
+// specified. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreSameSizeAndType(const CPURegister& reg1,
+                        const CPURegister& reg2,
+                        const CPURegister& reg3 = NoCPUReg,
+                        const CPURegister& reg4 = NoCPUReg,
+                        const CPURegister& reg5 = NoCPUReg,
+                        const CPURegister& reg6 = NoCPUReg,
+                        const CPURegister& reg7 = NoCPUReg,
+                        const CPURegister& reg8 = NoCPUReg);
+
+// AreEven returns true if all of the specified registers have even register
+// indices. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreEven(const CPURegister& reg1,
+             const CPURegister& reg2,
+             const CPURegister& reg3 = NoReg,
+             const CPURegister& reg4 = NoReg,
+             const CPURegister& reg5 = NoReg,
+             const CPURegister& reg6 = NoReg,
+             const CPURegister& reg7 = NoReg,
+             const CPURegister& reg8 = NoReg);
+
+// AreConsecutive returns true if all of the specified registers are
+// consecutive in the register file. Arguments set to NoReg are ignored, as are
+// any subsequent arguments. At least one argument (reg1) must be valid
+// (not NoCPUReg).
+bool AreConsecutive(const CPURegister& reg1,
+                    const CPURegister& reg2,
+                    const CPURegister& reg3 = NoCPUReg,
+                    const CPURegister& reg4 = NoCPUReg);
+
+// AreSameFormat returns true if all of the specified VRegisters have the same
+// vector format. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoVReg).
+bool AreSameFormat(const VRegister& reg1,
+                   const VRegister& reg2,
+                   const VRegister& reg3 = NoVReg,
+                   const VRegister& reg4 = NoVReg);
+
+
+// AreConsecutive returns true if all of the specified VRegisters are
+// consecutive in the register file. Arguments set to NoReg are ignored, as are
+// any subsequent arguments. At least one argument (reg1) must be valid
+// (not NoVReg).
+bool AreConsecutive(const VRegister& reg1,
+                    const VRegister& reg2,
+                    const VRegister& reg3 = NoVReg,
+                    const VRegister& reg4 = NoVReg);
+
+
+// Lists of registers.
+class CPURegList {
+ public:
+  explicit CPURegList(CPURegister reg1,
+                      CPURegister reg2 = NoCPUReg,
+                      CPURegister reg3 = NoCPUReg,
+                      CPURegister reg4 = NoCPUReg)
+      : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
+        size_(reg1.size()), type_(reg1.type()) {
+    VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
+    VIXL_ASSERT(IsValid());
+  }
+
+  CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
+      : list_(list), size_(size), type_(type) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  CPURegList(CPURegister::RegisterType type, unsigned size,
+             unsigned first_reg, unsigned last_reg)
+      : size_(size), type_(type) {
+    VIXL_ASSERT(((type == CPURegister::kRegister) &&
+                 (last_reg < kNumberOfRegisters)) ||
+                ((type == CPURegister::kVRegister) &&
+                 (last_reg < kNumberOfVRegisters)));
+    VIXL_ASSERT(last_reg >= first_reg);
+    list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
+    list_ &= ~((UINT64_C(1) << first_reg) - 1);
+    VIXL_ASSERT(IsValid());
+  }
+
+  CPURegister::RegisterType type() const {
+    VIXL_ASSERT(IsValid());
+    return type_;
+  }
+
+  // Combine another CPURegList into this one. Registers that already exist in
+  // this list are left unchanged. The type and size of the registers in the
+  // 'other' list must match those in this list.
+  void Combine(const CPURegList& other) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(other.type() == type_);
+    VIXL_ASSERT(other.RegisterSizeInBits() == size_);
+    list_ |= other.list();
+  }
+
+  // Remove every register in the other CPURegList from this one. Registers that
+  // do not exist in this list are ignored. The type and size of the registers
+  // in the 'other' list must match those in this list.
+  void Remove(const CPURegList& other) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(other.type() == type_);
+    VIXL_ASSERT(other.RegisterSizeInBits() == size_);
+    list_ &= ~other.list();
+  }
+
+  // Variants of Combine and Remove which take a single register.
+  void Combine(const CPURegister& other) {
+    VIXL_ASSERT(other.type() == type_);
+    VIXL_ASSERT(other.size() == size_);
+    Combine(other.code());
+  }
+
+  void Remove(const CPURegister& other) {
+    VIXL_ASSERT(other.type() == type_);
+    VIXL_ASSERT(other.size() == size_);
+    Remove(other.code());
+  }
+
+  // Variants of Combine and Remove which take a single register by its code;
+  // the type and size of the register is inferred from this list.
+  void Combine(int code) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
+    list_ |= (UINT64_C(1) << code);
+  }
+
+  void Remove(int code) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
+    list_ &= ~(UINT64_C(1) << code);
+  }
+
+  static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
+    VIXL_ASSERT(list_1.type_ == list_2.type_);
+    VIXL_ASSERT(list_1.size_ == list_2.size_);
+    return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
+  }
+  static CPURegList Union(const CPURegList& list_1,
+                          const CPURegList& list_2,
+                          const CPURegList& list_3);
+  static CPURegList Union(const CPURegList& list_1,
+                          const CPURegList& list_2,
+                          const CPURegList& list_3,
+                          const CPURegList& list_4);
+
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2) {
+    VIXL_ASSERT(list_1.type_ == list_2.type_);
+    VIXL_ASSERT(list_1.size_ == list_2.size_);
+    return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
+  }
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2,
+                                 const CPURegList& list_3);
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2,
+                                 const CPURegList& list_3,
+                                 const CPURegList& list_4);
+
+  bool Overlaps(const CPURegList& other) const {
+    return (type_ == other.type_) && ((list_ & other.list_) != 0);
+  }
+
+  RegList list() const {
+    VIXL_ASSERT(IsValid());
+    return list_;
+  }
+
+  void set_list(RegList new_list) {
+    VIXL_ASSERT(IsValid());
+    list_ = new_list;
+  }
+
+  // Remove all callee-saved registers from the list. This can be useful when
+  // preparing registers for an AAPCS64 function call, for example.
+  void RemoveCalleeSaved();
+
+  CPURegister PopLowestIndex();
+  CPURegister PopHighestIndex();
+
+  // AAPCS64 callee-saved registers.
+  static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
+  static CPURegList GetCalleeSavedV(unsigned size = kDRegSize);
+
+  // AAPCS64 caller-saved registers. Note that this includes lr.
+  // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
+  // 64-bits being caller-saved.
+  static CPURegList GetCallerSaved(unsigned size = kXRegSize);
+  static CPURegList GetCallerSavedV(unsigned size = kDRegSize);
+
+  bool IsEmpty() const {
+    VIXL_ASSERT(IsValid());
+    return list_ == 0;
+  }
+
+  bool IncludesAliasOf(const CPURegister& other) const {
+    VIXL_ASSERT(IsValid());
+    return (type_ == other.type()) && ((other.Bit() & list_) != 0);
+  }
+
+  bool IncludesAliasOf(int code) const {
+    VIXL_ASSERT(IsValid());
+    return ((code & list_) != 0);
+  }
+
+  int Count() const {
+    VIXL_ASSERT(IsValid());
+    return CountSetBits(list_);
+  }
+
+  unsigned RegisterSizeInBits() const {
+    VIXL_ASSERT(IsValid());
+    return size_;
+  }
+
+  unsigned RegisterSizeInBytes() const {
+    int size_in_bits = RegisterSizeInBits();
+    VIXL_ASSERT((size_in_bits % 8) == 0);
+    return size_in_bits / 8;
+  }
+
+  unsigned TotalSizeInBytes() const {
+    VIXL_ASSERT(IsValid());
+    return RegisterSizeInBytes() * Count();
+  }
+
+ private:
+  RegList list_;
+  unsigned size_;
+  CPURegister::RegisterType type_;
+
+  bool IsValid() const;
+};
+
+
+// AAPCS64 callee-saved registers.
+extern const CPURegList kCalleeSaved;
+extern const CPURegList kCalleeSavedV;
+
+
+// AAPCS64 caller-saved registers. Note that this includes lr.
+extern const CPURegList kCallerSaved;
+extern const CPURegList kCallerSavedV;
+
+
+// Operand.
+class Operand {
+ public:
+  // #<immediate>
+  // where <immediate> is int64_t.
+  // This is allowed to be an implicit constructor because Operand is
+  // a wrapper class that doesn't normally perform any type conversion.
+  Operand(int64_t immediate = 0);           // NOLINT(runtime/explicit)
+
+  // rm, {<shift> #<shift_amount>}
+  // where <shift> is one of {LSL, LSR, ASR, ROR}.
+  //       <shift_amount> is uint6_t.
+  // This is allowed to be an implicit constructor because Operand is
+  // a wrapper class that doesn't normally perform any type conversion.
+  Operand(Register reg,
+          Shift shift = LSL,
+          unsigned shift_amount = 0);   // NOLINT(runtime/explicit)
+
+  // rm, {<extend> {#<shift_amount>}}
+  // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
+  //       <shift_amount> is uint2_t.
+  explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0);
+
+  bool IsImmediate() const;
+  bool IsShiftedRegister() const;
+  bool IsExtendedRegister() const;
+  bool IsZero() const;
+
+  // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
+  // which helps in the encoding of instructions that use the stack pointer.
+  Operand ToExtendedRegister() const;
+
+  int64_t immediate() const {
+    VIXL_ASSERT(IsImmediate());
+    return immediate_;
+  }
+
+  Register reg() const {
+    VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
+    return reg_;
+  }
+
+  CPURegister maybeReg() const {
+    if (IsShiftedRegister() || IsExtendedRegister())
+      return reg_;
+    return NoCPUReg;
+  }
+
+  Shift shift() const {
+    VIXL_ASSERT(IsShiftedRegister());
+    return shift_;
+  }
+
+  Extend extend() const {
+    VIXL_ASSERT(IsExtendedRegister());
+    return extend_;
+  }
+
+  unsigned shift_amount() const {
+    VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
+    return shift_amount_;
+  }
+
+ private:
+  int64_t immediate_;
+  Register reg_;
+  Shift shift_;
+  Extend extend_;
+  unsigned shift_amount_;
+};
+
+
+// MemOperand represents the addressing mode of a load or store instruction.
+class MemOperand {
+ public:
+  explicit MemOperand(Register base,
+                      int64_t offset = 0,
+                      AddrMode addrmode = Offset);
+  MemOperand(Register base,
+             Register regoffset,
+             Shift shift = LSL,
+             unsigned shift_amount = 0);
+  MemOperand(Register base,
+             Register regoffset,
+             Extend extend,
+             unsigned shift_amount = 0);
+  MemOperand(Register base,
+             const Operand& offset,
+             AddrMode addrmode = Offset);
+
+  // Adapter constructors using C++11 delegating.
+  // TODO: If sp == kSPRegInternalCode, the xzr check isn't necessary.
+  explicit MemOperand(js::jit::Address addr)
+    : MemOperand(IsHiddenSP(addr.base) ? sp : Register(AsRegister(addr.base), 64),
+                 (ptrdiff_t)addr.offset) {
+  }
+
+  const Register& base() const { return base_; }
+  const Register& regoffset() const { return regoffset_; }
+  int64_t offset() const { return offset_; }
+  AddrMode addrmode() const { return addrmode_; }
+  Shift shift() const { return shift_; }
+  Extend extend() const { return extend_; }
+  unsigned shift_amount() const { return shift_amount_; }
+  bool IsImmediateOffset() const;
+  bool IsRegisterOffset() const;
+  bool IsPreIndex() const;
+  bool IsPostIndex() const;
+
+  void AddOffset(int64_t offset);
+
+ private:
+  Register base_;
+  Register regoffset_;
+  int64_t offset_;
+  AddrMode addrmode_;
+  Shift shift_;
+  Extend extend_;
+  unsigned shift_amount_;
+};
+
+
+// Control whether or not position-independent code should be emitted.
+enum PositionIndependentCodeOption {
+  // All code generated will be position-independent; all branches and
+  // references to labels generated with the Label class will use PC-relative
+  // addressing.
+  PositionIndependentCode,
+
+  // Allow VIXL to generate code that refers to absolute addresses. With this
+  // option, it will not be possible to copy the code buffer and run it from a
+  // different address; code must be generated in its final location.
+  PositionDependentCode,
+
+  // Allow VIXL to assume that the bottom 12 bits of the address will be
+  // constant, but that the top 48 bits may change. This allows `adrp` to
+  // function in systems which copy code between pages, but otherwise maintain
+  // 4KB page alignment.
+  PageOffsetDependentCode
+};
+
+
+// Control how scaled- and unscaled-offset loads and stores are generated.
+enum LoadStoreScalingOption {
+  // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
+  // register-offset, pre-index or post-index instructions if necessary.
+  PreferScaledOffset,
+
+  // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
+  // register-offset, pre-index or post-index instructions if necessary.
+  PreferUnscaledOffset,
+
+  // Require scaled-immediate-offset instructions.
+  RequireScaledOffset,
+
+  // Require unscaled-immediate-offset instructions.
+  RequireUnscaledOffset
+};
+
+
+// Assembler.
+class Assembler : public MozBaseAssembler {
+ public:
+  Assembler(PositionIndependentCodeOption pic = PositionIndependentCode);
+
+  // System functions.
+
+  // Finalize a code buffer of generated instructions. This function must be
+  // called before executing or copying code from the buffer.
+  void FinalizeCode();
+
+#define COPYENUM(v) static const Condition v = vixl::v
+#define COPYENUM_(v) static const Condition v = vixl::v##_
+  COPYENUM(Equal);
+  COPYENUM(Zero);
+  COPYENUM(NotEqual);
+  COPYENUM(NonZero);
+  COPYENUM(AboveOrEqual);
+  COPYENUM(CarrySet);
+  COPYENUM(Below);
+  COPYENUM(CarryClear);
+  COPYENUM(Signed);
+  COPYENUM(NotSigned);
+  COPYENUM(Overflow);
+  COPYENUM(NoOverflow);
+  COPYENUM(Above);
+  COPYENUM(BelowOrEqual);
+  COPYENUM_(GreaterThanOrEqual);
+  COPYENUM_(LessThan);
+  COPYENUM_(GreaterThan);
+  COPYENUM_(LessThanOrEqual);
+  COPYENUM(Always);
+  COPYENUM(Never);
+#undef COPYENUM
+#undef COPYENUM_
+
+  // Bit set when a DoubleCondition does not map to a single ARM condition.
+  // The MacroAssembler must special-case these conditions, or else
+  // ConditionFromDoubleCondition will complain.
+  static const int DoubleConditionBitSpecial = 0x100;
+
+  enum DoubleCondition {
+    DoubleOrdered                        = Condition::vc,
+    DoubleEqual                          = Condition::eq,
+    DoubleNotEqual                       = Condition::ne | DoubleConditionBitSpecial,
+    DoubleGreaterThan                    = Condition::gt,
+    DoubleGreaterThanOrEqual             = Condition::ge,
+    DoubleLessThan                       = Condition::lo, // Could also use Condition::mi.
+    DoubleLessThanOrEqual                = Condition::ls,
+
+    // If either operand is NaN, these conditions always evaluate to true.
+    DoubleUnordered                      = Condition::vs,
+    DoubleEqualOrUnordered               = Condition::eq | DoubleConditionBitSpecial,
+    DoubleNotEqualOrUnordered            = Condition::ne,
+    DoubleGreaterThanOrUnordered         = Condition::hi,
+    DoubleGreaterThanOrEqualOrUnordered  = Condition::hs,
+    DoubleLessThanOrUnordered            = Condition::lt,
+    DoubleLessThanOrEqualOrUnordered     = Condition::le
+  };
+
+  static inline Condition InvertCondition(Condition cond) {
+    // Conditions al and nv behave identically, as "always true". They can't be
+    // inverted, because there is no "always false" condition.
+    VIXL_ASSERT((cond != al) && (cond != nv));
+    return static_cast<Condition>(cond ^ 1);
+  }
+
+  // This is chaging the condition codes for cmp a, b to the same codes for cmp b, a.
+  static inline Condition InvertCmpCondition(Condition cond) {
+    // Conditions al and nv behave identically, as "always true". They can't be
+    // inverted, because there is no "always false" condition.
+    switch (cond) {
+    case eq:
+    case ne:
+      return cond;
+    case gt:
+      return le;
+    case le:
+      return gt;
+    case ge:
+      return lt;
+    case lt:
+      return ge;
+    case hi:
+      return lo;
+    case lo:
+      return hi;
+    case hs:
+      return ls;
+    case ls:
+      return hs;
+    case mi:
+      return pl;
+    case pl:
+      return mi;
+    default:
+      MOZ_CRASH("TODO: figure this case out.");
+    }
+    return static_cast<Condition>(cond ^ 1);
+  }
+
+  static inline DoubleCondition InvertCondition(DoubleCondition cond) {
+      switch (cond) {
+	case DoubleOrdered:
+	  return DoubleUnordered;
+	case DoubleEqual:
+	  return DoubleNotEqualOrUnordered;
+	case DoubleNotEqual:
+	  return DoubleEqualOrUnordered;
+	case DoubleGreaterThan:
+	  return DoubleLessThanOrEqualOrUnordered;
+	case DoubleGreaterThanOrEqual:
+	  return DoubleLessThanOrUnordered;
+	case DoubleLessThan:
+	  return DoubleGreaterThanOrEqualOrUnordered;
+	case DoubleLessThanOrEqual:
+	  return DoubleGreaterThanOrUnordered;
+	case DoubleUnordered:
+	  return DoubleOrdered;
+	case DoubleEqualOrUnordered:
+	  return DoubleNotEqual;
+	case DoubleNotEqualOrUnordered:
+	  return DoubleEqual;
+	case DoubleGreaterThanOrUnordered:
+	  return DoubleLessThanOrEqual;
+	case DoubleGreaterThanOrEqualOrUnordered:
+	  return DoubleLessThan;
+	case DoubleLessThanOrUnordered:
+	  return DoubleGreaterThanOrEqual;
+	case DoubleLessThanOrEqualOrUnordered:
+	  return DoubleGreaterThan;
+	default:
+	  MOZ_CRASH("Bad condition");
+    }
+  }
+
+  static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) {
+    VIXL_ASSERT(!(cond & DoubleConditionBitSpecial));
+    return static_cast<Condition>(cond);
+  }
+
+  // Instruction set functions.
+
+  // Branch / Jump instructions.
+  // Branch to register.
+  void br(const Register& xn);
+  static void br(Instruction* at, const Register& xn);
+
+  // Branch with link to register.
+  void blr(const Register& xn);
+  static void blr(Instruction* at, const Register& blr);
+
+  // Branch to register with return hint.
+  void ret(const Register& xn = lr);
+
+  // Unconditional branch to label.
+  BufferOffset b(Label* label);
+
+  // Conditional branch to label.
+  BufferOffset b(Label* label, Condition cond);
+
+  // Unconditional branch to PC offset.
+  BufferOffset b(int imm26, const LabelDoc& doc);
+  static void b(Instruction* at, int imm26);
+
+  // Conditional branch to PC offset.
+  BufferOffset b(int imm19, Condition cond, const LabelDoc& doc);
+  static void b(Instruction*at, int imm19, Condition cond);
+
+  // Branch with link to label.
+  void bl(Label* label);
+
+  // Branch with link to PC offset.
+  void bl(int imm26, const LabelDoc& doc);
+  static void bl(Instruction* at, int imm26);
+
+  // Compare and branch to label if zero.
+  void cbz(const Register& rt, Label* label);
+
+  // Compare and branch to PC offset if zero.
+  void cbz(const Register& rt, int imm19, const LabelDoc& doc);
+  static void cbz(Instruction* at, const Register& rt, int imm19);
+
+  // Compare and branch to label if not zero.
+  void cbnz(const Register& rt, Label* label);
+
+  // Compare and branch to PC offset if not zero.
+  void cbnz(const Register& rt, int imm19, const LabelDoc& doc);
+  static void cbnz(Instruction* at, const Register& rt, int imm19);
+
+  // Table lookup from one register.
+  void tbl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Table lookup from two registers.
+  void tbl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vm);
+
+  // Table lookup from three registers.
+  void tbl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vn3,
+           const VRegister& vm);
+
+  // Table lookup from four registers.
+  void tbl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vn3,
+           const VRegister& vn4,
+           const VRegister& vm);
+
+  // Table lookup extension from one register.
+  void tbx(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Table lookup extension from two registers.
+  void tbx(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vm);
+
+  // Table lookup extension from three registers.
+  void tbx(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vn3,
+           const VRegister& vm);
+
+  // Table lookup extension from four registers.
+  void tbx(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vn3,
+           const VRegister& vn4,
+           const VRegister& vm);
+
+  // Test bit and branch to label if zero.
+  void tbz(const Register& rt, unsigned bit_pos, Label* label);
+
+  // Test bit and branch to PC offset if zero.
+  void tbz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc);
+  static void tbz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14);
+
+  // Test bit and branch to label if not zero.
+  void tbnz(const Register& rt, unsigned bit_pos, Label* label);
+
+  // Test bit and branch to PC offset if not zero.
+  void tbnz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc);
+  static void tbnz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14);
+
+  // Address calculation instructions.
+  // Calculate a PC-relative address. Unlike for branches the offset in adr is
+  // unscaled (i.e. the result can be unaligned).
+
+  // Calculate the address of a label.
+  void adr(const Register& rd, Label* label);
+
+  // Calculate the address of a PC offset.
+  void adr(const Register& rd, int imm21, const LabelDoc& doc);
+  static void adr(Instruction* at, const Register& rd, int imm21);
+
+  // Calculate the page address of a label.
+  void adrp(const Register& rd, Label* label);
+
+  // Calculate the page address of a PC offset.
+  void adrp(const Register& rd, int imm21, const LabelDoc& doc);
+  static void adrp(Instruction* at, const Register& rd, int imm21);
+
+  // Data Processing instructions.
+  // Add.
+  void add(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+
+  // Add and update status flags.
+  void adds(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+
+  // Compare negative.
+  void cmn(const Register& rn, const Operand& operand);
+
+  // Subtract.
+  void sub(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+
+  // Subtract and update status flags.
+  void subs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+
+  // Compare.
+  void cmp(const Register& rn, const Operand& operand);
+
+  // Negate.
+  void neg(const Register& rd,
+           const Operand& operand);
+
+  // Negate and update status flags.
+  void negs(const Register& rd,
+            const Operand& operand);
+
+  // Add with carry bit.
+  void adc(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+
+  // Add with carry bit and update status flags.
+  void adcs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+
+  // Subtract with carry bit.
+  void sbc(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+
+  // Subtract with carry bit and update status flags.
+  void sbcs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+
+  // Negate with carry bit.
+  void ngc(const Register& rd,
+           const Operand& operand);
+
+  // Negate with carry bit and update status flags.
+  void ngcs(const Register& rd,
+            const Operand& operand);
+
+  // Logical instructions.
+  // Bitwise and (A & B).
+  void and_(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+
+  // Bitwise and (A & B) and update status flags.
+  BufferOffset ands(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand);
+
+  // Bit test and set flags.
+  BufferOffset tst(const Register& rn, const Operand& operand);
+
+  // Bit clear (A & ~B).
+  void bic(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+
+  // Bit clear (A & ~B) and update status flags.
+  void bics(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+
+  // Bitwise or (A | B).
+  void orr(const Register& rd, const Register& rn, const Operand& operand);
+
+  // Bitwise nor (A | ~B).
+  void orn(const Register& rd, const Register& rn, const Operand& operand);
+
+  // Bitwise eor/xor (A ^ B).
+  void eor(const Register& rd, const Register& rn, const Operand& operand);
+
+  // Bitwise enor/xnor (A ^ ~B).
+  void eon(const Register& rd, const Register& rn, const Operand& operand);
+
+  // Logical shift left by variable.
+  void lslv(const Register& rd, const Register& rn, const Register& rm);
+
+  // Logical shift right by variable.
+  void lsrv(const Register& rd, const Register& rn, const Register& rm);
+
+  // Arithmetic shift right by variable.
+  void asrv(const Register& rd, const Register& rn, const Register& rm);
+
+  // Rotate right by variable.
+  void rorv(const Register& rd, const Register& rn, const Register& rm);
+
+  // Bitfield instructions.
+  // Bitfield move.
+  void bfm(const Register& rd,
+           const Register& rn,
+           unsigned immr,
+           unsigned imms);
+
+  // Signed bitfield move.
+  void sbfm(const Register& rd,
+            const Register& rn,
+            unsigned immr,
+            unsigned imms);
+
+  // Unsigned bitfield move.
+  void ubfm(const Register& rd,
+            const Register& rn,
+            unsigned immr,
+            unsigned imms);
+
+  // Bfm aliases.
+  // Bitfield insert.
+  void bfi(const Register& rd,
+           const Register& rn,
+           unsigned lsb,
+           unsigned width) {
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
+    bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
+  }
+
+  // Bitfield extract and insert low.
+  void bfxil(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
+    bfm(rd, rn, lsb, lsb + width - 1);
+  }
+
+  // Sbfm aliases.
+  // Arithmetic shift right.
+  void asr(const Register& rd, const Register& rn, unsigned shift) {
+    VIXL_ASSERT(shift < rd.size());
+    sbfm(rd, rn, shift, rd.size() - 1);
+  }
+
+  // Signed bitfield insert with zero at right.
+  void sbfiz(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
+    sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
+  }
+
+  // Signed bitfield extract.
+  void sbfx(const Register& rd,
+            const Register& rn,
+            unsigned lsb,
+            unsigned width) {
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
+    sbfm(rd, rn, lsb, lsb + width - 1);
+  }
+
+  // Signed extend byte.
+  void sxtb(const Register& rd, const Register& rn) {
+    sbfm(rd, rn, 0, 7);
+  }
+
+  // Signed extend halfword.
+  void sxth(const Register& rd, const Register& rn) {
+    sbfm(rd, rn, 0, 15);
+  }
+
+  // Signed extend word.
+  void sxtw(const Register& rd, const Register& rn) {
+    sbfm(rd, rn, 0, 31);
+  }
+
+  // Ubfm aliases.
+  // Logical shift left.
+  void lsl(const Register& rd, const Register& rn, unsigned shift) {
+    unsigned reg_size = rd.size();
+    VIXL_ASSERT(shift < reg_size);
+    ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
+  }
+
+  // Logical shift right.
+  void lsr(const Register& rd, const Register& rn, unsigned shift) {
+    VIXL_ASSERT(shift < rd.size());
+    ubfm(rd, rn, shift, rd.size() - 1);
+  }
+
+  // Unsigned bitfield insert with zero at right.
+  void ubfiz(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
+    ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
+  }
+
+  // Unsigned bitfield extract.
+  void ubfx(const Register& rd,
+            const Register& rn,
+            unsigned lsb,
+            unsigned width) {
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
+    ubfm(rd, rn, lsb, lsb + width - 1);
+  }
+
+  // Unsigned extend byte.
+  void uxtb(const Register& rd, const Register& rn) {
+    ubfm(rd, rn, 0, 7);
+  }
+
+  // Unsigned extend halfword.
+  void uxth(const Register& rd, const Register& rn) {
+    ubfm(rd, rn, 0, 15);
+  }
+
+  // Unsigned extend word.
+  void uxtw(const Register& rd, const Register& rn) {
+    ubfm(rd, rn, 0, 31);
+  }
+
+  // Extract.
+  void extr(const Register& rd,
+            const Register& rn,
+            const Register& rm,
+            unsigned lsb);
+
+  // Conditional select: rd = cond ? rn : rm.
+  void csel(const Register& rd,
+            const Register& rn,
+            const Register& rm,
+            Condition cond);
+
+  // Conditional select increment: rd = cond ? rn : rm + 1.
+  void csinc(const Register& rd,
+             const Register& rn,
+             const Register& rm,
+             Condition cond);
+
+  // Conditional select inversion: rd = cond ? rn : ~rm.
+  void csinv(const Register& rd,
+             const Register& rn,
+             const Register& rm,
+             Condition cond);
+
+  // Conditional select negation: rd = cond ? rn : -rm.
+  void csneg(const Register& rd,
+             const Register& rn,
+             const Register& rm,
+             Condition cond);
+
+  // Conditional set: rd = cond ? 1 : 0.
+  void cset(const Register& rd, Condition cond);
+
+  // Conditional set mask: rd = cond ? -1 : 0.
+  void csetm(const Register& rd, Condition cond);
+
+  // Conditional increment: rd = cond ? rn + 1 : rn.
+  void cinc(const Register& rd, const Register& rn, Condition cond);
+
+  // Conditional invert: rd = cond ? ~rn : rn.
+  void cinv(const Register& rd, const Register& rn, Condition cond);
+
+  // Conditional negate: rd = cond ? -rn : rn.
+  void cneg(const Register& rd, const Register& rn, Condition cond);
+
+  // Rotate right.
+  void ror(const Register& rd, const Register& rs, unsigned shift) {
+    extr(rd, rs, rs, shift);
+  }
+
+  // Conditional comparison.
+  // Conditional compare negative.
+  void ccmn(const Register& rn,
+            const Operand& operand,
+            StatusFlags nzcv,
+            Condition cond);
+
+  // Conditional compare.
+  void ccmp(const Register& rn,
+            const Operand& operand,
+            StatusFlags nzcv,
+            Condition cond);
+
+  // CRC-32 checksum from byte.
+  void crc32b(const Register& rd,
+              const Register& rn,
+              const Register& rm);
+
+  // CRC-32 checksum from half-word.
+  void crc32h(const Register& rd,
+              const Register& rn,
+              const Register& rm);
+
+  // CRC-32 checksum from word.
+  void crc32w(const Register& rd,
+              const Register& rn,
+              const Register& rm);
+
+  // CRC-32 checksum from double word.
+  void crc32x(const Register& rd,
+              const Register& rn,
+              const Register& rm);
+
+  // CRC-32 C checksum from byte.
+  void crc32cb(const Register& rd,
+               const Register& rn,
+               const Register& rm);
+
+  // CRC-32 C checksum from half-word.
+  void crc32ch(const Register& rd,
+               const Register& rn,
+               const Register& rm);
+
+  // CRC-32 C checksum from word.
+  void crc32cw(const Register& rd,
+               const Register& rn,
+               const Register& rm);
+
+  // CRC-32C checksum from double word.
+  void crc32cx(const Register& rd,
+               const Register& rn,
+               const Register& rm);
+
+  // Multiply.
+  void mul(const Register& rd, const Register& rn, const Register& rm);
+
+  // Negated multiply.
+  void mneg(const Register& rd, const Register& rn, const Register& rm);
+
+  // Signed long multiply: 32 x 32 -> 64-bit.
+  void smull(const Register& rd, const Register& rn, const Register& rm);
+
+  // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
+  void smulh(const Register& xd, const Register& xn, const Register& xm);
+
+  // Multiply and accumulate.
+  void madd(const Register& rd,
+            const Register& rn,
+            const Register& rm,
+            const Register& ra);
+
+  // Multiply and subtract.
+  void msub(const Register& rd,
+            const Register& rn,
+            const Register& rm,
+            const Register& ra);
+
+  // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
+  void smaddl(const Register& rd,
+              const Register& rn,
+              const Register& rm,
+              const Register& ra);
+
+  // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
+  void umaddl(const Register& rd,
+              const Register& rn,
+              const Register& rm,
+              const Register& ra);
+
+  // Unsigned long multiply: 32 x 32 -> 64-bit.
+  void umull(const Register& rd,
+             const Register& rn,
+             const Register& rm) {
+    umaddl(rd, rn, rm, xzr);
+  }
+
+  // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
+  void umulh(const Register& xd,
+             const Register& xn,
+             const Register& xm);
+
+  // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
+  void smsubl(const Register& rd,
+              const Register& rn,
+              const Register& rm,
+              const Register& ra);
+
+  // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
+  void umsubl(const Register& rd,
+              const Register& rn,
+              const Register& rm,
+              const Register& ra);
+
+  // Signed integer divide.
+  void sdiv(const Register& rd, const Register& rn, const Register& rm);
+
+  // Unsigned integer divide.
+  void udiv(const Register& rd, const Register& rn, const Register& rm);
+
+  // Bit reverse.
+  void rbit(const Register& rd, const Register& rn);
+
+  // Reverse bytes in 16-bit half words.
+  void rev16(const Register& rd, const Register& rn);
+
+  // Reverse bytes in 32-bit words.
+  void rev32(const Register& rd, const Register& rn);
+
+  // Reverse bytes.
+  void rev(const Register& rd, const Register& rn);
+
+  // Count leading zeroes.
+  void clz(const Register& rd, const Register& rn);
+
+  // Count leading sign bits.
+  void cls(const Register& rd, const Register& rn);
+
+  // Memory instructions.
+  // Load integer or FP register.
+  void ldr(const CPURegister& rt, const MemOperand& src,
+           LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Store integer or FP register.
+  void str(const CPURegister& rt, const MemOperand& dst,
+           LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Load word with sign extension.
+  void ldrsw(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Load byte.
+  void ldrb(const Register& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Store byte.
+  void strb(const Register& rt, const MemOperand& dst,
+            LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Load byte with sign extension.
+  void ldrsb(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Load half-word.
+  void ldrh(const Register& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Store half-word.
+  void strh(const Register& rt, const MemOperand& dst,
+            LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Load half-word with sign extension.
+  void ldrsh(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Load integer or FP register (with unscaled offset).
+  void ldur(const CPURegister& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store integer or FP register (with unscaled offset).
+  void stur(const CPURegister& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load word with sign extension.
+  void ldursw(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load byte (with unscaled offset).
+  void ldurb(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store byte (with unscaled offset).
+  void sturb(const Register& rt, const MemOperand& dst,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load byte with sign extension (and unscaled offset).
+  void ldursb(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load half-word (with unscaled offset).
+  void ldurh(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store half-word (with unscaled offset).
+  void sturh(const Register& rt, const MemOperand& dst,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load half-word with sign extension (and unscaled offset).
+  void ldursh(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load integer or FP register pair.
+  void ldp(const CPURegister& rt, const CPURegister& rt2,
+           const MemOperand& src);
+
+  // Store integer or FP register pair.
+  void stp(const CPURegister& rt, const CPURegister& rt2,
+           const MemOperand& dst);
+
+  // Load word pair with sign extension.
+  void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
+
+  // Load integer or FP register pair, non-temporal.
+  void ldnp(const CPURegister& rt, const CPURegister& rt2,
+            const MemOperand& src);
+
+  // Store integer or FP register pair, non-temporal.
+  void stnp(const CPURegister& rt, const CPURegister& rt2,
+            const MemOperand& dst);
+
+  // Load integer or FP register from pc + imm19 << 2.
+  void ldr(const CPURegister& rt, int imm19);
+  static void ldr(Instruction* at, const CPURegister& rt, int imm19);
+
+  // Load word with sign extension from pc + imm19 << 2.
+  void ldrsw(const Register& rt, int imm19);
+
+  // Store exclusive byte.
+  void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store exclusive half-word.
+  void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store exclusive register.
+  void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Load exclusive byte.
+  void ldxrb(const Register& rt, const MemOperand& src);
+
+  // Load exclusive half-word.
+  void ldxrh(const Register& rt, const MemOperand& src);
+
+  // Load exclusive register.
+  void ldxr(const Register& rt, const MemOperand& src);
+
+  // Store exclusive register pair.
+  void stxp(const Register& rs,
+            const Register& rt,
+            const Register& rt2,
+            const MemOperand& dst);
+
+  // Load exclusive register pair.
+  void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
+
+  // Store-release exclusive byte.
+  void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store-release exclusive half-word.
+  void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store-release exclusive register.
+  void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Load-acquire exclusive byte.
+  void ldaxrb(const Register& rt, const MemOperand& src);
+
+  // Load-acquire exclusive half-word.
+  void ldaxrh(const Register& rt, const MemOperand& src);
+
+  // Load-acquire exclusive register.
+  void ldaxr(const Register& rt, const MemOperand& src);
+
+  // Store-release exclusive register pair.
+  void stlxp(const Register& rs,
+             const Register& rt,
+             const Register& rt2,
+             const MemOperand& dst);
+
+  // Load-acquire exclusive register pair.
+  void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
+
+  // Store-release byte.
+  void stlrb(const Register& rt, const MemOperand& dst);
+
+  // Store-release half-word.
+  void stlrh(const Register& rt, const MemOperand& dst);
+
+  // Store-release register.
+  void stlr(const Register& rt, const MemOperand& dst);
+
+  // Load-acquire byte.
+  void ldarb(const Register& rt, const MemOperand& src);
+
+  // Load-acquire half-word.
+  void ldarh(const Register& rt, const MemOperand& src);
+
+  // Load-acquire register.
+  void ldar(const Register& rt, const MemOperand& src);
+
+  // Compare and Swap word or doubleword in memory [Armv8.1].
+  void cas(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap word or doubleword in memory [Armv8.1].
+  void casa(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap word or doubleword in memory [Armv8.1].
+  void casl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap word or doubleword in memory [Armv8.1].
+  void casal(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap byte in memory [Armv8.1].
+  void casb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap byte in memory [Armv8.1].
+  void casab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap byte in memory [Armv8.1].
+  void caslb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap byte in memory [Armv8.1].
+  void casalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap halfword in memory [Armv8.1].
+  void cash(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap halfword in memory [Armv8.1].
+  void casah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap halfword in memory [Armv8.1].
+  void caslh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap halfword in memory [Armv8.1].
+  void casalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
+  void casp(const Register& rs,
+            const Register& rs2,
+            const Register& rt,
+            const Register& rt2,
+            const MemOperand& src);
+
+  // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
+  void caspa(const Register& rs,
+             const Register& rs2,
+             const Register& rt,
+             const Register& rt2,
+             const MemOperand& src);
+
+  // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
+  void caspl(const Register& rs,
+             const Register& rs2,
+             const Register& rt,
+             const Register& rt2,
+             const MemOperand& src);
+
+  // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
+  void caspal(const Register& rs,
+              const Register& rs2,
+              const Register& rt,
+              const Register& rt2,
+              const MemOperand& src);
+
+  // Atomic add on byte in memory [Armv8.1]
+  void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
+  void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
+  void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on byte in memory, with Load-acquire and Store-release semantics
+  // [Armv8.1]
+  void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on halfword in memory [Armv8.1]
+  void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
+  void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
+  void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on halfword in memory, with Load-acquire and Store-release
+  // semantics [Armv8.1]
+  void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on word or doubleword in memory [Armv8.1]
+  void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on word or doubleword in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on word or doubleword in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on word or doubleword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on byte in memory [Armv8.1]
+  void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
+  void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
+  void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on byte in memory, with Load-acquire and Store-release
+  // semantics [Armv8.1]
+  void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on halfword in memory [Armv8.1]
+  void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on halfword in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on halfword in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
+  // semantics [Armv8.1]
+  void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on word or doubleword in memory [Armv8.1]
+  void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on word or doubleword in memory, with Load-acquire
+  // semantics [Armv8.1]
+  void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on word or doubleword in memory, with Store-release
+  // semantics [Armv8.1]
+  void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit clear on word or doubleword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on byte in memory [Armv8.1]
+  void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on byte in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on byte in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
+  // semantics [Armv8.1]
+  void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on halfword in memory [Armv8.1]
+  void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on halfword in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on halfword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
+  void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
+  // semantics [Armv8.1]
+  void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on word or doubleword in memory, with Store-release
+  // semantics [Armv8.1]
+  void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on byte in memory [Armv8.1]
+  void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
+  void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
+  void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on byte in memory, with Load-acquire and Store-release
+  // semantics [Armv8.1]
+  void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on halfword in memory [Armv8.1]
+  void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
+  void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on halfword in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on halfword in memory, with Load-acquire and Store-release
+  // semantics [Armv8.1]
+  void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on word or doubleword in memory [Armv8.1]
+  void ldset(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on word or doubleword in memory, with Store-release
+  // semantics [Armv8.1]
+  void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic bit set on word or doubleword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on byte in memory [Armv8.1]
+  void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on byte in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on byte in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on byte in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on halfword in memory [Armv8.1]
+  void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on halfword in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on halfword in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on halfword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on word or doubleword in memory [Armv8.1]
+  void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on word or doubleword in memory, with Load-acquire
+  // semantics [Armv8.1]
+  void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on word or doubleword in memory, with Store-release
+  // semantics [Armv8.1]
+  void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed maximum on word or doubleword in memory, with Load-acquire
+  // and Store-release semantics [Armv8.1]
+  void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on byte in memory [Armv8.1]
+  void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on byte in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on byte in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on byte in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on halfword in memory [Armv8.1]
+  void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on halfword in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on halfword in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on halfword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on word or doubleword in memory [Armv8.1]
+  void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on word or doubleword in memory, with Load-acquire
+  // semantics [Armv8.1]
+  void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on word or doubleword in memory, with Store-release
+  // semantics [Armv8.1]
+  void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic signed minimum on word or doubleword in memory, with Load-acquire
+  // and Store-release semantics [Armv8.1]
+  void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on byte in memory [Armv8.1]
+  void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on byte in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on byte in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on halfword in memory [Armv8.1]
+  void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on halfword in memory, with Store-release semantics
+  // [Armv8.1]
+  void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on halfword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
+  void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
+  // semantics [Armv8.1]
+  void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on word or doubleword in memory, with Store-release
+  // semantics [Armv8.1]
+  void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
+  // and Store-release semantics [Armv8.1]
+  void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on byte in memory [Armv8.1]
+  void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on byte in memory, with Store-release semantics
+  // [Armv8.1]
+  void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on byte in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on halfword in memory [Armv8.1]
+  void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
+  // [Armv8.1]
+  void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on halfword in memory, with Store-release semantics
+  // [Armv8.1]
+  void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on halfword in memory, with Load-acquire and
+  // Store-release semantics [Armv8.1]
+  void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
+  void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
+  // semantics [Armv8.1]
+  void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on word or doubleword in memory, with Store-release
+  // semantics [Armv8.1]
+  void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
+  // and Store-release semantics [Armv8.1]
+  void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Atomic add on byte in memory, without return. [Armv8.1]
+  void staddb(const Register& rs, const MemOperand& src);
+
+  // Atomic add on byte in memory, with Store-release semantics and without
+  // return. [Armv8.1]
+  void staddlb(const Register& rs, const MemOperand& src);
+
+  // Atomic add on halfword in memory, without return. [Armv8.1]
+  void staddh(const Register& rs, const MemOperand& src);
+
+  // Atomic add on halfword in memory, with Store-release semantics and without
+  // return. [Armv8.1]
+  void staddlh(const Register& rs, const MemOperand& src);
+
+  // Atomic add on word or doubleword in memory, without return. [Armv8.1]
+  void stadd(const Register& rs, const MemOperand& src);
+
+  // Atomic add on word or doubleword in memory, with Store-release semantics
+  // and without return. [Armv8.1]
+  void staddl(const Register& rs, const MemOperand& src);
+
+  // Atomic bit clear on byte in memory, without return. [Armv8.1]
+  void stclrb(const Register& rs, const MemOperand& src);
+
+  // Atomic bit clear on byte in memory, with Store-release semantics and
+  // without return. [Armv8.1]
+  void stclrlb(const Register& rs, const MemOperand& src);
+
+  // Atomic bit clear on halfword in memory, without return. [Armv8.1]
+  void stclrh(const Register& rs, const MemOperand& src);
+
+  // Atomic bit clear on halfword in memory, with Store-release semantics and
+  // without return. [Armv8.1]
+  void stclrlh(const Register& rs, const MemOperand& src);
+
+  // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
+  void stclr(const Register& rs, const MemOperand& src);
+
+  // Atomic bit clear on word or doubleword in memory, with Store-release
+  // semantics and without return. [Armv8.1]
+  void stclrl(const Register& rs, const MemOperand& src);
+
+  // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
+  void steorb(const Register& rs, const MemOperand& src);
+
+  // Atomic exclusive OR on byte in memory, with Store-release semantics and
+  // without return. [Armv8.1]
+  void steorlb(const Register& rs, const MemOperand& src);
+
+  // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
+  void steorh(const Register& rs, const MemOperand& src);
+
+  // Atomic exclusive OR on halfword in memory, with Store-release semantics
+  // and without return. [Armv8.1]
+  void steorlh(const Register& rs, const MemOperand& src);
+
+  // Atomic exclusive OR on word or doubleword in memory, without return.
+  // [Armv8.1]
+  void steor(const Register& rs, const MemOperand& src);
+
+  // Atomic exclusive OR on word or doubleword in memory, with Store-release
+  // semantics and without return. [Armv8.1]
+  void steorl(const Register& rs, const MemOperand& src);
+
+  // Atomic bit set on byte in memory, without return. [Armv8.1]
+  void stsetb(const Register& rs, const MemOperand& src);
+
+  // Atomic bit set on byte in memory, with Store-release semantics and without
+  // return. [Armv8.1]
+  void stsetlb(const Register& rs, const MemOperand& src);
+
+  // Atomic bit set on halfword in memory, without return. [Armv8.1]
+  void stseth(const Register& rs, const MemOperand& src);
+
+  // Atomic bit set on halfword in memory, with Store-release semantics and
+  // without return. [Armv8.1]
+  void stsetlh(const Register& rs, const MemOperand& src);
+
+  // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
+  void stset(const Register& rs, const MemOperand& src);
+
+  // Atomic bit set on word or doubleword in memory, with Store-release
+  // semantics and without return. [Armv8.1]
+  void stsetl(const Register& rs, const MemOperand& src);
+
+  // Atomic signed maximum on byte in memory, without return. [Armv8.1]
+  void stsmaxb(const Register& rs, const MemOperand& src);
+
+  // Atomic signed maximum on byte in memory, with Store-release semantics and
+  // without return. [Armv8.1]
+  void stsmaxlb(const Register& rs, const MemOperand& src);
+
+  // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
+  void stsmaxh(const Register& rs, const MemOperand& src);
+
+  // Atomic signed maximum on halfword in memory, with Store-release semantics
+  // and without return. [Armv8.1]
+  void stsmaxlh(const Register& rs, const MemOperand& src);
+
+  // Atomic signed maximum on word or doubleword in memory, without return.
+  // [Armv8.1]
+  void stsmax(const Register& rs, const MemOperand& src);
+
+  // Atomic signed maximum on word or doubleword in memory, with Store-release
+  // semantics and without return. [Armv8.1]
+  void stsmaxl(const Register& rs, const MemOperand& src);
+
+  // Atomic signed minimum on byte in memory, without return. [Armv8.1]
+  void stsminb(const Register& rs, const MemOperand& src);
+
+  // Atomic signed minimum on byte in memory, with Store-release semantics and
+  // without return. [Armv8.1]
+  void stsminlb(const Register& rs, const MemOperand& src);
+
+  // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
+  void stsminh(const Register& rs, const MemOperand& src);
+
+  // Atomic signed minimum on halfword in memory, with Store-release semantics
+  // and without return. [Armv8.1]
+  void stsminlh(const Register& rs, const MemOperand& src);
+
+  // Atomic signed minimum on word or doubleword in memory, without return.
+  // [Armv8.1]
+  void stsmin(const Register& rs, const MemOperand& src);
+
+  // Atomic signed minimum on word or doubleword in memory, with Store-release
+  // semantics and without return. semantics [Armv8.1]
+  void stsminl(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
+  void stumaxb(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned maximum on byte in memory, with Store-release semantics and
+  // without return. [Armv8.1]
+  void stumaxlb(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
+  void stumaxh(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned maximum on halfword in memory, with Store-release semantics
+  // and without return. [Armv8.1]
+  void stumaxlh(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned maximum on word or doubleword in memory, without return.
+  // [Armv8.1]
+  void stumax(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned maximum on word or doubleword in memory, with Store-release
+  // semantics and without return. [Armv8.1]
+  void stumaxl(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
+  void stuminb(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned minimum on byte in memory, with Store-release semantics and
+  // without return. [Armv8.1]
+  void stuminlb(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
+  void stuminh(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned minimum on halfword in memory, with Store-release semantics
+  // and without return. [Armv8.1]
+  void stuminlh(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned minimum on word or doubleword in memory, without return.
+  // [Armv8.1]
+  void stumin(const Register& rs, const MemOperand& src);
+
+  // Atomic unsigned minimum on word or doubleword in memory, with Store-release
+  // semantics and without return. [Armv8.1]
+  void stuminl(const Register& rs, const MemOperand& src);
+
+  // Swap byte in memory [Armv8.1]
+  void swpb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap byte in memory, with Load-acquire semantics [Armv8.1]
+  void swpab(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap byte in memory, with Store-release semantics [Armv8.1]
+  void swplb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap byte in memory, with Load-acquire and Store-release semantics
+  // [Armv8.1]
+  void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap halfword in memory [Armv8.1]
+  void swph(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
+  void swpah(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap halfword in memory, with Store-release semantics [Armv8.1]
+  void swplh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap halfword in memory, with Load-acquire and Store-release semantics
+  // [Armv8.1]
+  void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap word or doubleword in memory [Armv8.1]
+  void swp(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
+  void swpa(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
+  void swpl(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Swap word or doubleword in memory, with Load-acquire and Store-release
+  // semantics [Armv8.1]
+  void swpal(const Register& rs, const Register& rt, const MemOperand& src);
+
+  // Prefetch memory.
+  void prfm(PrefetchOperation op, const MemOperand& addr,
+            LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Prefetch memory (with unscaled offset).
+  void prfum(PrefetchOperation op, const MemOperand& addr,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Prefetch from pc + imm19 << 2.
+  void prfm(PrefetchOperation op, int imm19);
+
+  // Move instructions. The default shift of -1 indicates that the move
+  // instruction will calculate an appropriate 16-bit immediate and left shift
+  // that is equal to the 64-bit immediate argument. If an explicit left shift
+  // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
+  //
+  // For movk, an explicit shift can be used to indicate which half word should
+  // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
+  // half word with zero, whereas movk(x0, 0, 48) will overwrite the
+  // most-significant.
+
+  // Move immediate and keep.
+  void movk(const Register& rd, uint64_t imm, int shift = -1) {
+    MoveWide(rd, imm, shift, MOVK);
+  }
+
+  // Move inverted immediate.
+  void movn(const Register& rd, uint64_t imm, int shift = -1) {
+    MoveWide(rd, imm, shift, MOVN);
+  }
+
+  // Move immediate.
+  void movz(const Register& rd, uint64_t imm, int shift = -1) {
+    MoveWide(rd, imm, shift, MOVZ);
+  }
+
+  // Misc instructions.
+  // Monitor debug-mode breakpoint.
+  void brk(int code);
+
+  // Halting debug-mode breakpoint.
+  void hlt(int code);
+
+  // Generate exception targeting EL1.
+  void svc(int code);
+  static void svc(Instruction* at, int code);
+
+  // Move register to register.
+  void mov(const Register& rd, const Register& rn);
+
+  // Move inverted operand to register.
+  void mvn(const Register& rd, const Operand& operand);
+
+  // System instructions.
+  // Move to register from system register.
+  void mrs(const Register& rt, SystemRegister sysreg);
+
+  // Move from register to system register.
+  void msr(SystemRegister sysreg, const Register& rt);
+
+  // System instruction.
+  void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr);
+
+  // System instruction with pre-encoded op (op1:crn:crm:op2).
+  void sys(int op, const Register& rt = xzr);
+
+  // System data cache operation.
+  void dc(DataCacheOp op, const Register& rt);
+
+  // System instruction cache operation.
+  void ic(InstructionCacheOp op, const Register& rt);
+
+  // System hint.
+  BufferOffset hint(SystemHint code);
+  static void hint(Instruction* at, SystemHint code);
+
+  // Clear exclusive monitor.
+  void clrex(int imm4 = 0xf);
+
+  // Data memory barrier.
+  void dmb(BarrierDomain domain, BarrierType type);
+
+  // Data synchronization barrier.
+  void dsb(BarrierDomain domain, BarrierType type);
+
+  // Instruction synchronization barrier.
+  void isb();
+
+  // Alias for system instructions.
+  // No-op.
+  BufferOffset nop() {
+    return hint(NOP);
+  }
+  static void nop(Instruction* at);
+
+  // Alias for system instructions.
+  // Conditional speculation barrier.
+  BufferOffset csdb() {
+    return hint(CSDB);
+  }
+  static void csdb(Instruction* at);
+
+  // FP and NEON instructions.
+  // Move double precision immediate to FP register.
+  void fmov(const VRegister& vd, double imm);
+
+  // Move single precision immediate to FP register.
+  void fmov(const VRegister& vd, float imm);
+
+  // Move FP register to register.
+  void fmov(const Register& rd, const VRegister& fn);
+
+  // Move register to FP register.
+  void fmov(const VRegister& vd, const Register& rn);
+
+  // Move FP register to FP register.
+  void fmov(const VRegister& vd, const VRegister& fn);
+
+  // Move 64-bit register to top half of 128-bit FP register.
+  void fmov(const VRegister& vd, int index, const Register& rn);
+
+  // Move top half of 128-bit FP register to 64-bit register.
+  void fmov(const Register& rd, const VRegister& vn, int index);
+
+  // FP add.
+  void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // FP subtract.
+  void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // FP multiply.
+  void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // FP fused multiply-add.
+  void fmadd(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             const VRegister& va);
+
+  // FP fused multiply-subtract.
+  void fmsub(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             const VRegister& va);
+
+  // FP fused multiply-add and negate.
+  void fnmadd(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              const VRegister& va);
+
+  // FP fused multiply-subtract and negate.
+  void fnmsub(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              const VRegister& va);
+
+  // FP multiply-negate scalar.
+  void fnmul(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP reciprocal exponent scalar.
+  void frecpx(const VRegister& vd,
+              const VRegister& vn);
+
+  // FP divide.
+  void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+  // FP maximum.
+  void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+  // FP minimum.
+  void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+  // FP maximum number.
+  void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+  // FP minimum number.
+  void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+  // FP absolute.
+  void fabs(const VRegister& vd, const VRegister& vn);
+
+  // FP negate.
+  void fneg(const VRegister& vd, const VRegister& vn);
+
+  // FP square root.
+  void fsqrt(const VRegister& vd, const VRegister& vn);
+
+  // FP round to integer, nearest with ties to away.
+  void frinta(const VRegister& vd, const VRegister& vn);
+
+  // FP round to integer, implicit rounding.
+  void frinti(const VRegister& vd, const VRegister& vn);
+
+  // FP round to integer, toward minus infinity.
+  void frintm(const VRegister& vd, const VRegister& vn);
+
+  // FP round to integer, nearest with ties to even.
+  void frintn(const VRegister& vd, const VRegister& vn);
+
+  // FP round to integer, toward plus infinity.
+  void frintp(const VRegister& vd, const VRegister& vn);
+
+  // FP round to integer, exact, implicit rounding.
+  void frintx(const VRegister& vd, const VRegister& vn);
+
+  // FP round to integer, towards zero.
+  void frintz(const VRegister& vd, const VRegister& vn);
+
+  void FPCompareMacro(const VRegister& vn,
+                      double value,
+                      FPTrapFlags trap);
+
+  void FPCompareMacro(const VRegister& vn,
+                      const VRegister& vm,
+                      FPTrapFlags trap);
+
+  // FP compare registers.
+  void fcmp(const VRegister& vn, const VRegister& vm);
+
+  // FP compare immediate.
+  void fcmp(const VRegister& vn, double value);
+
+  void FPCCompareMacro(const VRegister& vn,
+                       const VRegister& vm,
+                       StatusFlags nzcv,
+                       Condition cond,
+                       FPTrapFlags trap);
+
+  // FP conditional compare.
+  void fccmp(const VRegister& vn,
+             const VRegister& vm,
+             StatusFlags nzcv,
+             Condition cond);
+
+  // FP signaling compare registers.
+  void fcmpe(const VRegister& vn, const VRegister& vm);
+
+  // FP signaling compare immediate.
+  void fcmpe(const VRegister& vn, double value);
+
+  // FP conditional signaling compare.
+  void fccmpe(const VRegister& vn,
+              const VRegister& vm,
+              StatusFlags nzcv,
+              Condition cond);
+
+  // FP conditional select.
+  void fcsel(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             Condition cond);
+
+  // Common FP Convert functions.
+  void NEONFPConvertToInt(const Register& rd,
+                          const VRegister& vn,
+                          Instr op);
+  void NEONFPConvertToInt(const VRegister& vd,
+                          const VRegister& vn,
+                          Instr op);
+
+  // FP convert between precisions.
+  void fcvt(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to higher precision.
+  void fcvtl(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to higher precision (second part).
+  void fcvtl2(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to lower precision.
+  void fcvtn(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to lower prevision (second part).
+  void fcvtn2(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to lower precision, rounding to odd.
+  void fcvtxn(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to lower precision, rounding to odd (second part).
+  void fcvtxn2(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to signed integer, nearest with ties to away.
+  void fcvtas(const Register& rd, const VRegister& vn);
+
+  // FP convert to unsigned integer, nearest with ties to away.
+  void fcvtau(const Register& rd, const VRegister& vn);
+
+  // FP convert to signed integer, nearest with ties to away.
+  void fcvtas(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to unsigned integer, nearest with ties to away.
+  void fcvtau(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to signed integer, round towards -infinity.
+  void fcvtms(const Register& rd, const VRegister& vn);
+
+  // FP convert to unsigned integer, round towards -infinity.
+  void fcvtmu(const Register& rd, const VRegister& vn);
+
+  // FP convert to signed integer, round towards -infinity.
+  void fcvtms(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to unsigned integer, round towards -infinity.
+  void fcvtmu(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to signed integer, nearest with ties to even.
+  void fcvtns(const Register& rd, const VRegister& vn);
+
+  // FP convert to unsigned integer, nearest with ties to even.
+  void fcvtnu(const Register& rd, const VRegister& vn);
+
+  // FP convert to signed integer, nearest with ties to even.
+  void fcvtns(const VRegister& rd, const VRegister& vn);
+
+  // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
+  void fjcvtzs(const Register& rd, const VRegister& vn);
+
+  // FP convert to unsigned integer, nearest with ties to even.
+  void fcvtnu(const VRegister& rd, const VRegister& vn);
+
+  // FP convert to signed integer or fixed-point, round towards zero.
+  void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
+
+  // FP convert to unsigned integer or fixed-point, round towards zero.
+  void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
+
+  // FP convert to signed integer or fixed-point, round towards zero.
+  void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
+
+  // FP convert to unsigned integer or fixed-point, round towards zero.
+  void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
+
+  // FP convert to signed integer, round towards +infinity.
+  void fcvtps(const Register& rd, const VRegister& vn);
+
+  // FP convert to unsigned integer, round towards +infinity.
+  void fcvtpu(const Register& rd, const VRegister& vn);
+
+  // FP convert to signed integer, round towards +infinity.
+  void fcvtps(const VRegister& vd, const VRegister& vn);
+
+  // FP convert to unsigned integer, round towards +infinity.
+  void fcvtpu(const VRegister& vd, const VRegister& vn);
+
+  // Convert signed integer or fixed point to FP.
+  void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
+
+  // Convert unsigned integer or fixed point to FP.
+  void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
+
+  // Convert signed integer or fixed-point to FP.
+  void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
+
+  // Convert unsigned integer or fixed-point to FP.
+  void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
+
+  // Unsigned absolute difference.
+  void uabd(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Signed absolute difference.
+  void sabd(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Unsigned absolute difference and accumulate.
+  void uaba(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Signed absolute difference and accumulate.
+  void saba(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Add.
+  void add(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Subtract.
+  void sub(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Unsigned halving add.
+  void uhadd(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed halving add.
+  void shadd(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned rounding halving add.
+  void urhadd(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed rounding halving add.
+  void srhadd(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned halving sub.
+  void uhsub(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed halving sub.
+  void shsub(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned saturating add.
+  void uqadd(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed saturating add.
+  void sqadd(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned saturating subtract.
+  void uqsub(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed saturating subtract.
+  void sqsub(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Add pairwise.
+  void addp(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Add pair of elements scalar.
+  void addp(const VRegister& vd,
+            const VRegister& vn);
+
+  // Multiply-add to accumulator.
+  void mla(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Multiply-subtract to accumulator.
+  void mls(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Multiply.
+  void mul(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Multiply by scalar element.
+  void mul(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm,
+           int vm_index);
+
+  // Multiply-add by scalar element.
+  void mla(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm,
+           int vm_index);
+
+  // Multiply-subtract by scalar element.
+  void mls(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm,
+           int vm_index);
+
+  // Signed long multiply-add by scalar element.
+  void smlal(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             int vm_index);
+
+  // Signed long multiply-add by scalar element (second part).
+  void smlal2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              int vm_index);
+
+  // Unsigned long multiply-add by scalar element.
+  void umlal(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             int vm_index);
+
+  // Unsigned long multiply-add by scalar element (second part).
+  void umlal2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              int vm_index);
+
+  // Signed long multiply-sub by scalar element.
+  void smlsl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             int vm_index);
+
+  // Signed long multiply-sub by scalar element (second part).
+  void smlsl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              int vm_index);
+
+  // Unsigned long multiply-sub by scalar element.
+  void umlsl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             int vm_index);
+
+  // Unsigned long multiply-sub by scalar element (second part).
+  void umlsl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              int vm_index);
+
+  // Signed long multiply by scalar element.
+  void smull(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             int vm_index);
+
+  // Signed long multiply by scalar element (second part).
+  void smull2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              int vm_index);
+
+  // Unsigned long multiply by scalar element.
+  void umull(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             int vm_index);
+
+  // Unsigned long multiply by scalar element (second part).
+  void umull2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              int vm_index);
+
+  // Signed saturating double long multiply by element.
+  void sqdmull(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm,
+               int vm_index);
+
+  // Signed saturating double long multiply by element (second part).
+  void sqdmull2(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm,
+                int vm_index);
+
+  // Signed saturating doubling long multiply-add by element.
+  void sqdmlal(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm,
+               int vm_index);
+
+  // Signed saturating doubling long multiply-add by element (second part).
+  void sqdmlal2(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm,
+                int vm_index);
+
+  // Signed saturating doubling long multiply-sub by element.
+  void sqdmlsl(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm,
+               int vm_index);
+
+  // Signed saturating doubling long multiply-sub by element (second part).
+  void sqdmlsl2(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm,
+                int vm_index);
+
+  // Compare equal.
+  void cmeq(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Compare signed greater than or equal.
+  void cmge(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Compare signed greater than.
+  void cmgt(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Compare unsigned higher.
+  void cmhi(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Compare unsigned higher or same.
+  void cmhs(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Compare bitwise test bits nonzero.
+  void cmtst(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Compare bitwise to zero.
+  void cmeq(const VRegister& vd,
+            const VRegister& vn,
+            int value);
+
+  // Compare signed greater than or equal to zero.
+  void cmge(const VRegister& vd,
+            const VRegister& vn,
+            int value);
+
+  // Compare signed greater than zero.
+  void cmgt(const VRegister& vd,
+            const VRegister& vn,
+            int value);
+
+  // Compare signed less than or equal to zero.
+  void cmle(const VRegister& vd,
+            const VRegister& vn,
+            int value);
+
+  // Compare signed less than zero.
+  void cmlt(const VRegister& vd,
+            const VRegister& vn,
+            int value);
+
+  // Signed shift left by register.
+  void sshl(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Unsigned shift left by register.
+  void ushl(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Signed saturating shift left by register.
+  void sqshl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned saturating shift left by register.
+  void uqshl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed rounding shift left by register.
+  void srshl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned rounding shift left by register.
+  void urshl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed saturating rounding shift left by register.
+  void sqrshl(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned saturating rounding shift left by register.
+  void uqrshl(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Bitwise and.
+  void and_(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Bitwise or.
+  void orr(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Bitwise or immediate.
+  void orr(const VRegister& vd,
+           const int imm8,
+           const int left_shift = 0);
+
+  // Move register to register.
+  void mov(const VRegister& vd,
+           const VRegister& vn);
+
+  // Bitwise orn.
+  void orn(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Bitwise eor.
+  void eor(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Bit clear immediate.
+  void bic(const VRegister& vd,
+           const int imm8,
+           const int left_shift = 0);
+
+  // Bit clear.
+  void bic(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Bitwise insert if false.
+  void bif(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Bitwise insert if true.
+  void bit(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Bitwise select.
+  void bsl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm);
+
+  // Polynomial multiply.
+  void pmul(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Vector move immediate.
+  void movi(const VRegister& vd,
+            const uint64_t imm,
+            Shift shift = LSL,
+            const int shift_amount = 0);
+
+  // Bitwise not.
+  void mvn(const VRegister& vd,
+           const VRegister& vn);
+
+  // Vector move inverted immediate.
+  void mvni(const VRegister& vd,
+            const int imm8,
+            Shift shift = LSL,
+            const int shift_amount = 0);
+
+  // Signed saturating accumulate of unsigned value.
+  void suqadd(const VRegister& vd,
+              const VRegister& vn);
+
+  // Unsigned saturating accumulate of signed value.
+  void usqadd(const VRegister& vd,
+              const VRegister& vn);
+
+  // Absolute value.
+  void abs(const VRegister& vd,
+           const VRegister& vn);
+
+  // Signed saturating absolute value.
+  void sqabs(const VRegister& vd,
+             const VRegister& vn);
+
+  // Negate.
+  void neg(const VRegister& vd,
+           const VRegister& vn);
+
+  // Signed saturating negate.
+  void sqneg(const VRegister& vd,
+             const VRegister& vn);
+
+  // Bitwise not.
+  void not_(const VRegister& vd,
+            const VRegister& vn);
+
+  // Extract narrow.
+  void xtn(const VRegister& vd,
+           const VRegister& vn);
+
+  // Extract narrow (second part).
+  void xtn2(const VRegister& vd,
+            const VRegister& vn);
+
+  // Signed saturating extract narrow.
+  void sqxtn(const VRegister& vd,
+             const VRegister& vn);
+
+  // Signed saturating extract narrow (second part).
+  void sqxtn2(const VRegister& vd,
+              const VRegister& vn);
+
+  // Unsigned saturating extract narrow.
+  void uqxtn(const VRegister& vd,
+             const VRegister& vn);
+
+  // Unsigned saturating extract narrow (second part).
+  void uqxtn2(const VRegister& vd,
+              const VRegister& vn);
+
+  // Signed saturating extract unsigned narrow.
+  void sqxtun(const VRegister& vd,
+              const VRegister& vn);
+
+  // Signed saturating extract unsigned narrow (second part).
+  void sqxtun2(const VRegister& vd,
+               const VRegister& vn);
+
+  // Extract vector from pair of vectors.
+  void ext(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm,
+           int index);
+
+  // Duplicate vector element to vector or scalar.
+  void dup(const VRegister& vd,
+           const VRegister& vn,
+           int vn_index);
+
+  // Move vector element to scalar.
+  void mov(const VRegister& vd,
+           const VRegister& vn,
+           int vn_index);
+
+  // Duplicate general-purpose register to vector.
+  void dup(const VRegister& vd,
+           const Register& rn);
+
+  // Insert vector element from another vector element.
+  void ins(const VRegister& vd,
+           int vd_index,
+           const VRegister& vn,
+           int vn_index);
+
+  // Move vector element to another vector element.
+  void mov(const VRegister& vd,
+           int vd_index,
+           const VRegister& vn,
+           int vn_index);
+
+  // Insert vector element from general-purpose register.
+  void ins(const VRegister& vd,
+           int vd_index,
+           const Register& rn);
+
+  // Move general-purpose register to a vector element.
+  void mov(const VRegister& vd,
+           int vd_index,
+           const Register& rn);
+
+  // Unsigned move vector element to general-purpose register.
+  void umov(const Register& rd,
+            const VRegister& vn,
+            int vn_index);
+
+  // Move vector element to general-purpose register.
+  void mov(const Register& rd,
+           const VRegister& vn,
+           int vn_index);
+
+  // Signed move vector element to general-purpose register.
+  void smov(const Register& rd,
+            const VRegister& vn,
+            int vn_index);
+
+  // One-element structure load to one register.
+  void ld1(const VRegister& vt,
+           const MemOperand& src);
+
+  // One-element structure load to two registers.
+  void ld1(const VRegister& vt,
+           const VRegister& vt2,
+           const MemOperand& src);
+
+  // One-element structure load to three registers.
+  void ld1(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const MemOperand& src);
+
+  // One-element structure load to four registers.
+  void ld1(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           const MemOperand& src);
+
+  // One-element single structure load to one lane.
+  void ld1(const VRegister& vt,
+           int lane,
+           const MemOperand& src);
+
+  // One-element single structure load to all lanes.
+  void ld1r(const VRegister& vt,
+            const MemOperand& src);
+
+  // Two-element structure load.
+  void ld2(const VRegister& vt,
+           const VRegister& vt2,
+           const MemOperand& src);
+
+  // Two-element single structure load to one lane.
+  void ld2(const VRegister& vt,
+           const VRegister& vt2,
+           int lane,
+           const MemOperand& src);
+
+  // Two-element single structure load to all lanes.
+  void ld2r(const VRegister& vt,
+            const VRegister& vt2,
+            const MemOperand& src);
+
+  // Three-element structure load.
+  void ld3(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const MemOperand& src);
+
+  // Three-element single structure load to one lane.
+  void ld3(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           int lane,
+           const MemOperand& src);
+
+  // Three-element single structure load to all lanes.
+  void ld3r(const VRegister& vt,
+            const VRegister& vt2,
+            const VRegister& vt3,
+            const MemOperand& src);
+
+  // Four-element structure load.
+  void ld4(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           const MemOperand& src);
+
+  // Four-element single structure load to one lane.
+  void ld4(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           int lane,
+           const MemOperand& src);
+
+  // Four-element single structure load to all lanes.
+  void ld4r(const VRegister& vt,
+            const VRegister& vt2,
+            const VRegister& vt3,
+            const VRegister& vt4,
+            const MemOperand& src);
+
+  // Count leading sign bits.
+  void cls(const VRegister& vd,
+           const VRegister& vn);
+
+  // Count leading zero bits (vector).
+  void clz(const VRegister& vd,
+           const VRegister& vn);
+
+  // Population count per byte.
+  void cnt(const VRegister& vd,
+           const VRegister& vn);
+
+  // Reverse bit order.
+  void rbit(const VRegister& vd,
+            const VRegister& vn);
+
+  // Reverse elements in 16-bit halfwords.
+  void rev16(const VRegister& vd,
+             const VRegister& vn);
+
+  // Reverse elements in 32-bit words.
+  void rev32(const VRegister& vd,
+             const VRegister& vn);
+
+  // Reverse elements in 64-bit doublewords.
+  void rev64(const VRegister& vd,
+             const VRegister& vn);
+
+  // Unsigned reciprocal square root estimate.
+  void ursqrte(const VRegister& vd,
+               const VRegister& vn);
+
+  // Unsigned reciprocal estimate.
+  void urecpe(const VRegister& vd,
+              const VRegister& vn);
+
+  // Signed pairwise long add.
+  void saddlp(const VRegister& vd,
+              const VRegister& vn);
+
+  // Unsigned pairwise long add.
+  void uaddlp(const VRegister& vd,
+              const VRegister& vn);
+
+  // Signed pairwise long add and accumulate.
+  void sadalp(const VRegister& vd,
+              const VRegister& vn);
+
+  // Unsigned pairwise long add and accumulate.
+  void uadalp(const VRegister& vd,
+              const VRegister& vn);
+
+  // Shift left by immediate.
+  void shl(const VRegister& vd,
+           const VRegister& vn,
+           int shift);
+
+  // Signed saturating shift left by immediate.
+  void sqshl(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Signed saturating shift left unsigned by immediate.
+  void sqshlu(const VRegister& vd,
+              const VRegister& vn,
+              int shift);
+
+  // Unsigned saturating shift left by immediate.
+  void uqshl(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Signed shift left long by immediate.
+  void sshll(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Signed shift left long by immediate (second part).
+  void sshll2(const VRegister& vd,
+              const VRegister& vn,
+              int shift);
+
+  // Signed extend long.
+  void sxtl(const VRegister& vd,
+            const VRegister& vn);
+
+  // Signed extend long (second part).
+  void sxtl2(const VRegister& vd,
+             const VRegister& vn);
+
+  // Unsigned shift left long by immediate.
+  void ushll(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Unsigned shift left long by immediate (second part).
+  void ushll2(const VRegister& vd,
+              const VRegister& vn,
+              int shift);
+
+  // Shift left long by element size.
+  void shll(const VRegister& vd,
+            const VRegister& vn,
+            int shift);
+
+  // Shift left long by element size (second part).
+  void shll2(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Unsigned extend long.
+  void uxtl(const VRegister& vd,
+            const VRegister& vn);
+
+  // Unsigned extend long (second part).
+  void uxtl2(const VRegister& vd,
+             const VRegister& vn);
+
+  // Shift left by immediate and insert.
+  void sli(const VRegister& vd,
+           const VRegister& vn,
+           int shift);
+
+  // Shift right by immediate and insert.
+  void sri(const VRegister& vd,
+           const VRegister& vn,
+           int shift);
+
+  // Signed maximum.
+  void smax(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Signed pairwise maximum.
+  void smaxp(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Add across vector.
+  void addv(const VRegister& vd,
+            const VRegister& vn);
+
+  // Signed add long across vector.
+  void saddlv(const VRegister& vd,
+              const VRegister& vn);
+
+  // Unsigned add long across vector.
+  void uaddlv(const VRegister& vd,
+              const VRegister& vn);
+
+  // FP maximum number across vector.
+  void fmaxnmv(const VRegister& vd,
+               const VRegister& vn);
+
+  // FP maximum across vector.
+  void fmaxv(const VRegister& vd,
+             const VRegister& vn);
+
+  // FP minimum number across vector.
+  void fminnmv(const VRegister& vd,
+               const VRegister& vn);
+
+  // FP minimum across vector.
+  void fminv(const VRegister& vd,
+             const VRegister& vn);
+
+  // Signed maximum across vector.
+  void smaxv(const VRegister& vd,
+             const VRegister& vn);
+
+  // Signed minimum.
+  void smin(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Signed minimum pairwise.
+  void sminp(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed minimum across vector.
+  void sminv(const VRegister& vd,
+             const VRegister& vn);
+
+  // One-element structure store from one register.
+  void st1(const VRegister& vt,
+           const MemOperand& src);
+
+  // One-element structure store from two registers.
+  void st1(const VRegister& vt,
+           const VRegister& vt2,
+           const MemOperand& src);
+
+  // One-element structure store from three registers.
+  void st1(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const MemOperand& src);
+
+  // One-element structure store from four registers.
+  void st1(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           const MemOperand& src);
+
+  // One-element single structure store from one lane.
+  void st1(const VRegister& vt,
+           int lane,
+           const MemOperand& src);
+
+  // Two-element structure store from two registers.
+  void st2(const VRegister& vt,
+           const VRegister& vt2,
+           const MemOperand& src);
+
+  // Two-element single structure store from two lanes.
+  void st2(const VRegister& vt,
+           const VRegister& vt2,
+           int lane,
+           const MemOperand& src);
+
+  // Three-element structure store from three registers.
+  void st3(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const MemOperand& src);
+
+  // Three-element single structure store from three lanes.
+  void st3(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           int lane,
+           const MemOperand& src);
+
+  // Four-element structure store from four registers.
+  void st4(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           const MemOperand& src);
+
+  // Four-element single structure store from four lanes.
+  void st4(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           int lane,
+           const MemOperand& src);
+
+  // Unsigned add long.
+  void uaddl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned add long (second part).
+  void uaddl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned add wide.
+  void uaddw(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned add wide (second part).
+  void uaddw2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed add long.
+  void saddl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed add long (second part).
+  void saddl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed add wide.
+  void saddw(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed add wide (second part).
+  void saddw2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned subtract long.
+  void usubl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned subtract long (second part).
+  void usubl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned subtract wide.
+  void usubw(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned subtract wide (second part).
+  void usubw2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed subtract long.
+  void ssubl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed subtract long (second part).
+  void ssubl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed integer subtract wide.
+  void ssubw(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed integer subtract wide (second part).
+  void ssubw2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned maximum.
+  void umax(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Unsigned pairwise maximum.
+  void umaxp(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned maximum across vector.
+  void umaxv(const VRegister& vd,
+             const VRegister& vn);
+
+  // Unsigned minimum.
+  void umin(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Unsigned pairwise minimum.
+  void uminp(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned minimum across vector.
+  void uminv(const VRegister& vd,
+             const VRegister& vn);
+
+  // Transpose vectors (primary).
+  void trn1(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Transpose vectors (secondary).
+  void trn2(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Unzip vectors (primary).
+  void uzp1(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Unzip vectors (secondary).
+  void uzp2(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Zip vectors (primary).
+  void zip1(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Zip vectors (secondary).
+  void zip2(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // Signed shift right by immediate.
+  void sshr(const VRegister& vd,
+            const VRegister& vn,
+            int shift);
+
+  // Unsigned shift right by immediate.
+  void ushr(const VRegister& vd,
+            const VRegister& vn,
+            int shift);
+
+  // Signed rounding shift right by immediate.
+  void srshr(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Unsigned rounding shift right by immediate.
+  void urshr(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Signed shift right by immediate and accumulate.
+  void ssra(const VRegister& vd,
+            const VRegister& vn,
+            int shift);
+
+  // Unsigned shift right by immediate and accumulate.
+  void usra(const VRegister& vd,
+            const VRegister& vn,
+            int shift);
+
+  // Signed rounding shift right by immediate and accumulate.
+  void srsra(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Unsigned rounding shift right by immediate and accumulate.
+  void ursra(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Shift right narrow by immediate.
+  void shrn(const VRegister& vd,
+            const VRegister& vn,
+            int shift);
+
+  // Shift right narrow by immediate (second part).
+  void shrn2(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Rounding shift right narrow by immediate.
+  void rshrn(const VRegister& vd,
+             const VRegister& vn,
+             int shift);
+
+  // Rounding shift right narrow by immediate (second part).
+  void rshrn2(const VRegister& vd,
+              const VRegister& vn,
+              int shift);
+
+  // Unsigned saturating shift right narrow by immediate.
+  void uqshrn(const VRegister& vd,
+              const VRegister& vn,
+              int shift);
+
+  // Unsigned saturating shift right narrow by immediate (second part).
+  void uqshrn2(const VRegister& vd,
+               const VRegister& vn,
+               int shift);
+
+  // Unsigned saturating rounding shift right narrow by immediate.
+  void uqrshrn(const VRegister& vd,
+               const VRegister& vn,
+               int shift);
+
+  // Unsigned saturating rounding shift right narrow by immediate (second part).
+  void uqrshrn2(const VRegister& vd,
+                const VRegister& vn,
+                int shift);
+
+  // Signed saturating shift right narrow by immediate.
+  void sqshrn(const VRegister& vd,
+              const VRegister& vn,
+              int shift);
+
+  // Signed saturating shift right narrow by immediate (second part).
+  void sqshrn2(const VRegister& vd,
+               const VRegister& vn,
+               int shift);
+
+  // Signed saturating rounded shift right narrow by immediate.
+  void sqrshrn(const VRegister& vd,
+               const VRegister& vn,
+               int shift);
+
+  // Signed saturating rounded shift right narrow by immediate (second part).
+  void sqrshrn2(const VRegister& vd,
+                const VRegister& vn,
+                int shift);
+
+  // Signed saturating shift right unsigned narrow by immediate.
+  void sqshrun(const VRegister& vd,
+               const VRegister& vn,
+               int shift);
+
+  // Signed saturating shift right unsigned narrow by immediate (second part).
+  void sqshrun2(const VRegister& vd,
+                const VRegister& vn,
+                int shift);
+
+  // Signed sat rounded shift right unsigned narrow by immediate.
+  void sqrshrun(const VRegister& vd,
+                const VRegister& vn,
+                int shift);
+
+  // Signed sat rounded shift right unsigned narrow by immediate (second part).
+  void sqrshrun2(const VRegister& vd,
+                 const VRegister& vn,
+                 int shift);
+
+  // FP reciprocal step.
+  void frecps(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // FP reciprocal estimate.
+  void frecpe(const VRegister& vd,
+              const VRegister& vn);
+
+  // FP reciprocal square root estimate.
+  void frsqrte(const VRegister& vd,
+               const VRegister& vn);
+
+  // FP reciprocal square root step.
+  void frsqrts(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // Signed absolute difference and accumulate long.
+  void sabal(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed absolute difference and accumulate long (second part).
+  void sabal2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned absolute difference and accumulate long.
+  void uabal(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned absolute difference and accumulate long (second part).
+  void uabal2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed absolute difference long.
+  void sabdl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed absolute difference long (second part).
+  void sabdl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned absolute difference long.
+  void uabdl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned absolute difference long (second part).
+  void uabdl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Polynomial multiply long.
+  void pmull(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Polynomial multiply long (second part).
+  void pmull2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed long multiply-add.
+  void smlal(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed long multiply-add (second part).
+  void smlal2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned long multiply-add.
+  void umlal(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned long multiply-add (second part).
+  void umlal2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed long multiply-sub.
+  void smlsl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed long multiply-sub (second part).
+  void smlsl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Unsigned long multiply-sub.
+  void umlsl(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned long multiply-sub (second part).
+  void umlsl2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed long multiply.
+  void smull(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Signed long multiply (second part).
+  void smull2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Signed saturating doubling long multiply-add.
+  void sqdmlal(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // Signed saturating doubling long multiply-add (second part).
+  void sqdmlal2(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm);
+
+  // Signed saturating doubling long multiply-subtract.
+  void sqdmlsl(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // Signed saturating doubling long multiply-subtract (second part).
+  void sqdmlsl2(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm);
+
+  // Signed saturating doubling long multiply.
+  void sqdmull(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // Signed saturating doubling long multiply (second part).
+  void sqdmull2(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm);
+
+  // Signed saturating doubling multiply returning high half.
+  void sqdmulh(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // Signed saturating rounding doubling multiply returning high half.
+  void sqrdmulh(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm);
+
+  // Signed saturating doubling multiply element returning high half.
+  void sqdmulh(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm,
+               int vm_index);
+
+  // Signed saturating rounding doubling multiply element returning high half.
+  void sqrdmulh(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm,
+                int vm_index);
+
+  // Unsigned long multiply long.
+  void umull(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Unsigned long multiply (second part).
+  void umull2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Add narrow returning high half.
+  void addhn(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Add narrow returning high half (second part).
+  void addhn2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Rounding add narrow returning high half.
+  void raddhn(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Rounding add narrow returning high half (second part).
+  void raddhn2(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // Subtract narrow returning high half.
+  void subhn(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // Subtract narrow returning high half (second part).
+  void subhn2(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Rounding subtract narrow returning high half.
+  void rsubhn(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm);
+
+  // Rounding subtract narrow returning high half (second part).
+  void rsubhn2(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // FP vector multiply accumulate.
+  void fmla(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // FP vector multiply subtract.
+  void fmls(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // FP vector multiply extended.
+  void fmulx(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP absolute greater than or equal.
+  void facge(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP absolute greater than.
+  void facgt(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP multiply by element.
+  void fmul(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm,
+            int vm_index);
+
+  // FP fused multiply-add to accumulator by element.
+  void fmla(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm,
+            int vm_index);
+
+  // FP fused multiply-sub from accumulator by element.
+  void fmls(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm,
+            int vm_index);
+
+  // FP multiply extended by element.
+  void fmulx(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             int vm_index);
+
+  // FP compare equal.
+  void fcmeq(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP greater than.
+  void fcmgt(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP greater than or equal.
+  void fcmge(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP compare equal to zero.
+  void fcmeq(const VRegister& vd,
+             const VRegister& vn,
+             double imm);
+
+  // FP greater than zero.
+  void fcmgt(const VRegister& vd,
+             const VRegister& vn,
+             double imm);
+
+  // FP greater than or equal to zero.
+  void fcmge(const VRegister& vd,
+             const VRegister& vn,
+             double imm);
+
+  // FP less than or equal to zero.
+  void fcmle(const VRegister& vd,
+             const VRegister& vn,
+             double imm);
+
+  // FP less than to zero.
+  void fcmlt(const VRegister& vd,
+             const VRegister& vn,
+             double imm);
+
+  // FP absolute difference.
+  void fabd(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm);
+
+  // FP pairwise add vector.
+  void faddp(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP pairwise add scalar.
+  void faddp(const VRegister& vd,
+             const VRegister& vn);
+
+  // FP pairwise maximum vector.
+  void fmaxp(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP pairwise maximum scalar.
+  void fmaxp(const VRegister& vd,
+             const VRegister& vn);
+
+  // FP pairwise minimum vector.
+  void fminp(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm);
+
+  // FP pairwise minimum scalar.
+  void fminp(const VRegister& vd,
+             const VRegister& vn);
+
+  // FP pairwise maximum number vector.
+  void fmaxnmp(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // FP pairwise maximum number scalar.
+  void fmaxnmp(const VRegister& vd,
+               const VRegister& vn);
+
+  // FP pairwise minimum number vector.
+  void fminnmp(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm);
+
+  // FP pairwise minimum number scalar.
+  void fminnmp(const VRegister& vd,
+               const VRegister& vn);
+
+  // Emit generic instructions.
+  // Emit raw instructions into the instruction stream.
+  void dci(Instr raw_inst) { Emit(raw_inst); }
+
+  // Emit 32 bits of data into the instruction stream.
+  void dc32(uint32_t data) {
+    EmitData(&data, sizeof(data));
+  }
+
+  // Emit 64 bits of data into the instruction stream.
+  void dc64(uint64_t data) {
+    EmitData(&data, sizeof(data));
+  }
+
+  // Code generation helpers.
+
+  // Register encoding.
+  static Instr Rd(CPURegister rd) {
+    VIXL_ASSERT(rd.code() != kSPRegInternalCode);
+    return rd.code() << Rd_offset;
+  }
+
+  static Instr Rn(CPURegister rn) {
+    VIXL_ASSERT(rn.code() != kSPRegInternalCode);
+    return rn.code() << Rn_offset;
+  }
+
+  static Instr Rm(CPURegister rm) {
+    VIXL_ASSERT(rm.code() != kSPRegInternalCode);
+    return rm.code() << Rm_offset;
+  }
+
+  static Instr RmNot31(CPURegister rm) {
+    VIXL_ASSERT(rm.code() != kSPRegInternalCode);
+    VIXL_ASSERT(!rm.IsZero());
+    return Rm(rm);
+  }
+
+  static Instr Ra(CPURegister ra) {
+    VIXL_ASSERT(ra.code() != kSPRegInternalCode);
+    return ra.code() << Ra_offset;
+  }
+
+  static Instr Rt(CPURegister rt) {
+    VIXL_ASSERT(rt.code() != kSPRegInternalCode);
+    return rt.code() << Rt_offset;
+  }
+
+  static Instr Rt2(CPURegister rt2) {
+    VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
+    return rt2.code() << Rt2_offset;
+  }
+
+  static Instr Rs(CPURegister rs) {
+    VIXL_ASSERT(rs.code() != kSPRegInternalCode);
+    return rs.code() << Rs_offset;
+  }
+
+  // These encoding functions allow the stack pointer to be encoded, and
+  // disallow the zero register.
+  static Instr RdSP(Register rd) {
+    VIXL_ASSERT(!rd.IsZero());
+    return (rd.code() & kRegCodeMask) << Rd_offset;
+  }
+
+  static Instr RnSP(Register rn) {
+    VIXL_ASSERT(!rn.IsZero());
+    return (rn.code() & kRegCodeMask) << Rn_offset;
+  }
+
+  // Flags encoding.
+  static Instr Flags(FlagsUpdate S) {
+    if (S == SetFlags) {
+      return 1 << FlagsUpdate_offset;
+    } else if (S == LeaveFlags) {
+      return 0 << FlagsUpdate_offset;
+    }
+    VIXL_UNREACHABLE();
+    return 0;
+  }
+
+  static Instr Cond(Condition cond) {
+    return cond << Condition_offset;
+  }
+
+  // PC-relative address encoding.
+  static Instr ImmPCRelAddress(int imm21) {
+    VIXL_ASSERT(IsInt21(imm21));
+    Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
+    Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
+    Instr immlo = imm << ImmPCRelLo_offset;
+    return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
+  }
+
+  // Branch encoding.
+  static Instr ImmUncondBranch(int imm26) {
+    VIXL_ASSERT(IsInt26(imm26));
+    return TruncateToUint26(imm26) << ImmUncondBranch_offset;
+  }
+
+  static Instr ImmCondBranch(int imm19) {
+    VIXL_ASSERT(IsInt19(imm19));
+    return TruncateToUint19(imm19) << ImmCondBranch_offset;
+  }
+
+  static Instr ImmCmpBranch(int imm19) {
+    VIXL_ASSERT(IsInt19(imm19));
+    return TruncateToUint19(imm19) << ImmCmpBranch_offset;
+  }
+
+  static Instr ImmTestBranch(int imm14) {
+    VIXL_ASSERT(IsInt14(imm14));
+    return TruncateToUint14(imm14) << ImmTestBranch_offset;
+  }
+
+  static Instr ImmTestBranchBit(unsigned bit_pos) {
+    VIXL_ASSERT(IsUint6(bit_pos));
+    // Subtract five from the shift offset, as we need bit 5 from bit_pos.
+    unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
+    unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
+    b5 &= ImmTestBranchBit5_mask;
+    b40 &= ImmTestBranchBit40_mask;
+    return b5 | b40;
+  }
+
+  // Data Processing encoding.
+  static Instr SF(Register rd) {
+      return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
+  }
+
+  static Instr ImmAddSub(int imm) {
+    VIXL_ASSERT(IsImmAddSub(imm));
+    if (IsUint12(imm)) {  // No shift required.
+      imm <<= ImmAddSub_offset;
+    } else {
+      imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
+    }
+    return imm;
+  }
+
+  static Instr ImmS(unsigned imms, unsigned reg_size) {
+    VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
+           ((reg_size == kWRegSize) && IsUint5(imms)));
+    USE(reg_size);
+    return imms << ImmS_offset;
+  }
+
+  static Instr ImmR(unsigned immr, unsigned reg_size) {
+    VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
+           ((reg_size == kWRegSize) && IsUint5(immr)));
+    USE(reg_size);
+    VIXL_ASSERT(IsUint6(immr));
+    return immr << ImmR_offset;
+  }
+
+  static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
+    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+    VIXL_ASSERT(IsUint6(imms));
+    VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
+    USE(reg_size);
+    return imms << ImmSetBits_offset;
+  }
+
+  static Instr ImmRotate(unsigned immr, unsigned reg_size) {
+    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+    VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
+           ((reg_size == kWRegSize) && IsUint5(immr)));
+    USE(reg_size);
+    return immr << ImmRotate_offset;
+  }
+
+  static Instr ImmLLiteral(int imm19) {
+    VIXL_ASSERT(IsInt19(imm19));
+    return TruncateToUint19(imm19) << ImmLLiteral_offset;
+  }
+
+  static Instr BitN(unsigned bitn, unsigned reg_size) {
+    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+    VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
+    USE(reg_size);
+    return bitn << BitN_offset;
+  }
+
+  static Instr ShiftDP(Shift shift) {
+    VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
+    return shift << ShiftDP_offset;
+  }
+
+  static Instr ImmDPShift(unsigned amount) {
+    VIXL_ASSERT(IsUint6(amount));
+    return amount << ImmDPShift_offset;
+  }
+
+  static Instr ExtendMode(Extend extend) {
+    return extend << ExtendMode_offset;
+  }
+
+  static Instr ImmExtendShift(unsigned left_shift) {
+    VIXL_ASSERT(left_shift <= 4);
+    return left_shift << ImmExtendShift_offset;
+  }
+
+  static Instr ImmCondCmp(unsigned imm) {
+    VIXL_ASSERT(IsUint5(imm));
+    return imm << ImmCondCmp_offset;
+  }
+
+  static Instr Nzcv(StatusFlags nzcv) {
+    return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
+  }
+
+  // MemOperand offset encoding.
+  static Instr ImmLSUnsigned(int imm12) {
+    VIXL_ASSERT(IsUint12(imm12));
+    return imm12 << ImmLSUnsigned_offset;
+  }
+
+  static Instr ImmLS(int imm9) {
+    VIXL_ASSERT(IsInt9(imm9));
+    return TruncateToUint9(imm9) << ImmLS_offset;
+  }
+
+  static Instr ImmLSPair(int imm7, unsigned access_size) {
+    VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7);
+    int scaled_imm7 = imm7 >> access_size;
+    VIXL_ASSERT(IsInt7(scaled_imm7));
+    return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
+  }
+
+  static Instr ImmShiftLS(unsigned shift_amount) {
+    VIXL_ASSERT(IsUint1(shift_amount));
+    return shift_amount << ImmShiftLS_offset;
+  }
+
+  static Instr ImmPrefetchOperation(int imm5) {
+    VIXL_ASSERT(IsUint5(imm5));
+    return imm5 << ImmPrefetchOperation_offset;
+  }
+
+  static Instr ImmException(int imm16) {
+    VIXL_ASSERT(IsUint16(imm16));
+    return imm16 << ImmException_offset;
+  }
+
+  static Instr ImmSystemRegister(int imm15) {
+    VIXL_ASSERT(IsUint15(imm15));
+    return imm15 << ImmSystemRegister_offset;
+  }
+
+  static Instr ImmHint(int imm7) {
+    VIXL_ASSERT(IsUint7(imm7));
+    return imm7 << ImmHint_offset;
+  }
+
+  static Instr CRm(int imm4) {
+    VIXL_ASSERT(IsUint4(imm4));
+    return imm4 << CRm_offset;
+  }
+
+  static Instr CRn(int imm4) {
+    VIXL_ASSERT(IsUint4(imm4));
+    return imm4 << CRn_offset;
+  }
+
+  static Instr SysOp(int imm14) {
+    VIXL_ASSERT(IsUint14(imm14));
+    return imm14 << SysOp_offset;
+  }
+
+  static Instr ImmSysOp1(int imm3) {
+    VIXL_ASSERT(IsUint3(imm3));
+    return imm3 << SysOp1_offset;
+  }
+
+  static Instr ImmSysOp2(int imm3) {
+    VIXL_ASSERT(IsUint3(imm3));
+    return imm3 << SysOp2_offset;
+  }
+
+  static Instr ImmBarrierDomain(int imm2) {
+    VIXL_ASSERT(IsUint2(imm2));
+    return imm2 << ImmBarrierDomain_offset;
+  }
+
+  static Instr ImmBarrierType(int imm2) {
+    VIXL_ASSERT(IsUint2(imm2));
+    return imm2 << ImmBarrierType_offset;
+  }
+
+  // Move immediates encoding.
+  static Instr ImmMoveWide(uint64_t imm) {
+    VIXL_ASSERT(IsUint16(imm));
+    return static_cast<Instr>(imm << ImmMoveWide_offset);
+  }
+
+  static Instr ShiftMoveWide(int64_t shift) {
+    VIXL_ASSERT(IsUint2(shift));
+    return static_cast<Instr>(shift << ShiftMoveWide_offset);
+  }
+
+  // FP Immediates.
+  static Instr ImmFP32(float imm);
+  static Instr ImmFP64(double imm);
+
+  // FP register type.
+  static Instr FPType(FPRegister fd) {
+    return fd.Is64Bits() ? FP64 : FP32;
+  }
+
+  static Instr FPScale(unsigned scale) {
+    VIXL_ASSERT(IsUint6(scale));
+    return scale << FPScale_offset;
+  }
+
+  // Immediate field checking helpers.
+  static bool IsImmAddSub(int64_t immediate);
+  static bool IsImmConditionalCompare(int64_t immediate);
+  static bool IsImmFP32(float imm);
+  static bool IsImmFP64(double imm);
+  static bool IsImmLogical(uint64_t value,
+                           unsigned width,
+                           unsigned* n = NULL,
+                           unsigned* imm_s = NULL,
+                           unsigned* imm_r = NULL);
+  static bool IsImmLSPair(int64_t offset, unsigned access_size);
+  static bool IsImmLSScaled(int64_t offset, unsigned access_size);
+  static bool IsImmLSUnscaled(int64_t offset);
+  static bool IsImmMovn(uint64_t imm, unsigned reg_size);
+  static bool IsImmMovz(uint64_t imm, unsigned reg_size);
+
+  // Instruction bits for vector format in data processing operations.
+  static Instr VFormat(VRegister vd) {
+    if (vd.Is64Bits()) {
+      switch (vd.lanes()) {
+        case 2: return NEON_2S;
+        case 4: return NEON_4H;
+        case 8: return NEON_8B;
+        default: return 0xffffffff;
+      }
+    } else {
+      VIXL_ASSERT(vd.Is128Bits());
+      switch (vd.lanes()) {
+        case 2: return NEON_2D;
+        case 4: return NEON_4S;
+        case 8: return NEON_8H;
+        case 16: return NEON_16B;
+        default: return 0xffffffff;
+      }
+    }
+  }
+
+  // Instruction bits for vector format in floating point data processing
+  // operations.
+  static Instr FPFormat(VRegister vd) {
+    if (vd.lanes() == 1) {
+      // Floating point scalar formats.
+      VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
+      return vd.Is64Bits() ? FP64 : FP32;
+    }
+
+    // Two lane floating point vector formats.
+    if (vd.lanes() == 2) {
+      VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
+      return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
+    }
+
+    // Four lane floating point vector format.
+    VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits());
+    return NEON_FP_4S;
+  }
+
+  // Instruction bits for vector format in load and store operations.
+  static Instr LSVFormat(VRegister vd) {
+    if (vd.Is64Bits()) {
+      switch (vd.lanes()) {
+        case 1: return LS_NEON_1D;
+        case 2: return LS_NEON_2S;
+        case 4: return LS_NEON_4H;
+        case 8: return LS_NEON_8B;
+        default: return 0xffffffff;
+      }
+    } else {
+      VIXL_ASSERT(vd.Is128Bits());
+      switch (vd.lanes()) {
+        case 2: return LS_NEON_2D;
+        case 4: return LS_NEON_4S;
+        case 8: return LS_NEON_8H;
+        case 16: return LS_NEON_16B;
+        default: return 0xffffffff;
+      }
+    }
+  }
+
+  // Instruction bits for scalar format in data processing operations.
+  static Instr SFormat(VRegister vd) {
+    VIXL_ASSERT(vd.lanes() == 1);
+    switch (vd.SizeInBytes()) {
+      case 1: return NEON_B;
+      case 2: return NEON_H;
+      case 4: return NEON_S;
+      case 8: return NEON_D;
+      default: return 0xffffffff;
+    }
+  }
+
+  static Instr ImmNEONHLM(int index, int num_bits) {
+    int h, l, m;
+    if (num_bits == 3) {
+      VIXL_ASSERT(IsUint3(index));
+      h  = (index >> 2) & 1;
+      l  = (index >> 1) & 1;
+      m  = (index >> 0) & 1;
+    } else if (num_bits == 2) {
+      VIXL_ASSERT(IsUint2(index));
+      h  = (index >> 1) & 1;
+      l  = (index >> 0) & 1;
+      m  = 0;
+    } else {
+      VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
+      h  = (index >> 0) & 1;
+      l  = 0;
+      m  = 0;
+    }
+    return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
+  }
+
+  static Instr ImmNEONExt(int imm4) {
+    VIXL_ASSERT(IsUint4(imm4));
+    return imm4 << ImmNEONExt_offset;
+  }
+
+  static Instr ImmNEON5(Instr format, int index) {
+    VIXL_ASSERT(IsUint4(index));
+    int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
+    int imm5 = (index << (s + 1)) | (1 << s);
+    return imm5 << ImmNEON5_offset;
+  }
+
+  static Instr ImmNEON4(Instr format, int index) {
+    VIXL_ASSERT(IsUint4(index));
+    int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
+    int imm4 = index << s;
+    return imm4 << ImmNEON4_offset;
+  }
+
+  static Instr ImmNEONabcdefgh(int imm8) {
+    VIXL_ASSERT(IsUint8(imm8));
+    Instr instr;
+    instr  = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
+    instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
+    return instr;
+  }
+
+  static Instr NEONCmode(int cmode) {
+    VIXL_ASSERT(IsUint4(cmode));
+    return cmode << NEONCmode_offset;
+  }
+
+  static Instr NEONModImmOp(int op) {
+    VIXL_ASSERT(IsUint1(op));
+    return op << NEONModImmOp_offset;
+  }
+
+  size_t size() const {
+    return SizeOfCodeGenerated();
+  }
+
+  size_t SizeOfCodeGenerated() const {
+    return armbuffer_.size();
+  }
+
+  PositionIndependentCodeOption pic() const {
+    return pic_;
+  }
+
+  CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
+
+  void SetCPUFeatures(const CPUFeatures& cpu_features) {
+    cpu_features_ = cpu_features;
+  }
+
+  bool AllowPageOffsetDependentCode() const {
+    return (pic() == PageOffsetDependentCode) ||
+           (pic() == PositionDependentCode);
+  }
+
+  static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
+    return reg.Is64Bits() ? xzr : wzr;
+  }
+
+
+ protected:
+  void LoadStore(const CPURegister& rt,
+                 const MemOperand& addr,
+                 LoadStoreOp op,
+                 LoadStoreScalingOption option = PreferScaledOffset);
+
+  void LoadStorePair(const CPURegister& rt,
+                     const CPURegister& rt2,
+                     const MemOperand& addr,
+                     LoadStorePairOp op);
+  void LoadStoreStruct(const VRegister& vt,
+                       const MemOperand& addr,
+                       NEONLoadStoreMultiStructOp op);
+  void LoadStoreStruct1(const VRegister& vt,
+                        int reg_count,
+                        const MemOperand& addr);
+  void LoadStoreStructSingle(const VRegister& vt,
+                             uint32_t lane,
+                             const MemOperand& addr,
+                             NEONLoadStoreSingleStructOp op);
+  void LoadStoreStructSingleAllLanes(const VRegister& vt,
+                                     const MemOperand& addr,
+                                     NEONLoadStoreSingleStructOp op);
+  void LoadStoreStructVerify(const VRegister& vt,
+                             const MemOperand& addr,
+                             Instr op);
+
+  void Prefetch(PrefetchOperation op,
+                const MemOperand& addr,
+                LoadStoreScalingOption option = PreferScaledOffset);
+
+  BufferOffset Logical(const Register& rd,
+                       const Register& rn,
+                       const Operand& operand,
+                       LogicalOp op);
+  BufferOffset LogicalImmediate(const Register& rd,
+                                const Register& rn,
+                                unsigned n,
+                                unsigned imm_s,
+                                unsigned imm_r,
+                                LogicalOp op);
+
+  void ConditionalCompare(const Register& rn,
+                          const Operand& operand,
+                          StatusFlags nzcv,
+                          Condition cond,
+                          ConditionalCompareOp op);
+
+  void AddSubWithCarry(const Register& rd,
+                       const Register& rn,
+                       const Operand& operand,
+                       FlagsUpdate S,
+                       AddSubWithCarryOp op);
+
+
+  // Functions for emulating operands not directly supported by the instruction
+  // set.
+  void EmitShift(const Register& rd,
+                 const Register& rn,
+                 Shift shift,
+                 unsigned amount);
+  void EmitExtendShift(const Register& rd,
+                       const Register& rn,
+                       Extend extend,
+                       unsigned left_shift);
+
+  void AddSub(const Register& rd,
+              const Register& rn,
+              const Operand& operand,
+              FlagsUpdate S,
+              AddSubOp op);
+
+  void NEONTable(const VRegister& vd,
+                 const VRegister& vn,
+                 const VRegister& vm,
+                 NEONTableOp op);
+
+  // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
+  // registers. Only simple loads are supported; sign- and zero-extension (such
+  // as in LDPSW_x or LDRB_w) are not supported.
+  static LoadStoreOp LoadOpFor(const CPURegister& rt);
+  static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
+                                       const CPURegister& rt2);
+  static LoadStoreOp StoreOpFor(const CPURegister& rt);
+  static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
+                                        const CPURegister& rt2);
+  static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
+    const CPURegister& rt, const CPURegister& rt2);
+  static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
+    const CPURegister& rt, const CPURegister& rt2);
+  static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
+
+  // Convenience pass-through for CPU feature checks.
+  bool CPUHas(CPUFeatures::Feature feature0,
+              CPUFeatures::Feature feature1 = CPUFeatures::kNone,
+              CPUFeatures::Feature feature2 = CPUFeatures::kNone,
+              CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
+    return cpu_features_.Has(feature0, feature1, feature2, feature3);
+  }
+
+  // Determine whether the target CPU has the specified registers, based on the
+  // currently-enabled CPU features. Presence of a register does not imply
+  // support for arbitrary operations on it. For example, CPUs with FP have H
+  // registers, but most half-precision operations require the FPHalf feature.
+  //
+  // These are used to check CPU features in loads and stores that have the same
+  // entry point for both integer and FP registers.
+  bool CPUHas(const CPURegister& rt) const;
+  bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
+
+  bool CPUHas(SystemRegister sysreg) const;
+
+ private:
+  static uint32_t FP32ToImm8(float imm);
+  static uint32_t FP64ToImm8(double imm);
+
+  // Instruction helpers.
+  void MoveWide(const Register& rd,
+                uint64_t imm,
+                int shift,
+                MoveWideImmediateOp mov_op);
+  BufferOffset DataProcShiftedRegister(const Register& rd,
+                                       const Register& rn,
+                                       const Operand& operand,
+                                       FlagsUpdate S,
+                                       Instr op);
+  void DataProcExtendedRegister(const Register& rd,
+                                const Register& rn,
+                                const Operand& operand,
+                                FlagsUpdate S,
+                                Instr op);
+  void LoadStorePairNonTemporal(const CPURegister& rt,
+                                const CPURegister& rt2,
+                                const MemOperand& addr,
+                                LoadStorePairNonTemporalOp op);
+  void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
+  void ConditionalSelect(const Register& rd,
+                         const Register& rn,
+                         const Register& rm,
+                         Condition cond,
+                         ConditionalSelectOp op);
+  void DataProcessing1Source(const Register& rd,
+                             const Register& rn,
+                             DataProcessing1SourceOp op);
+  void DataProcessing3Source(const Register& rd,
+                             const Register& rn,
+                             const Register& rm,
+                             const Register& ra,
+                             DataProcessing3SourceOp op);
+  void FPDataProcessing1Source(const VRegister& fd,
+                               const VRegister& fn,
+                               FPDataProcessing1SourceOp op);
+  void FPDataProcessing3Source(const VRegister& fd,
+                               const VRegister& fn,
+                               const VRegister& fm,
+                               const VRegister& fa,
+                               FPDataProcessing3SourceOp op);
+  void NEONAcrossLanesL(const VRegister& vd,
+                        const VRegister& vn,
+                        NEONAcrossLanesOp op);
+  void NEONAcrossLanes(const VRegister& vd,
+                       const VRegister& vn,
+                       NEONAcrossLanesOp op);
+  void NEONModifiedImmShiftLsl(const VRegister& vd,
+                               const int imm8,
+                               const int left_shift,
+                               NEONModifiedImmediateOp op);
+  void NEONModifiedImmShiftMsl(const VRegister& vd,
+                               const int imm8,
+                               const int shift_amount,
+                               NEONModifiedImmediateOp op);
+  void NEONFP2Same(const VRegister& vd,
+                   const VRegister& vn,
+                   Instr vop);
+  void NEON3Same(const VRegister& vd,
+                 const VRegister& vn,
+                 const VRegister& vm,
+                 NEON3SameOp vop);
+  void NEONFP3Same(const VRegister& vd,
+                   const VRegister& vn,
+                   const VRegister& vm,
+                   Instr op);
+  void NEON3DifferentL(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm,
+                       NEON3DifferentOp vop);
+  void NEON3DifferentW(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm,
+                       NEON3DifferentOp vop);
+  void NEON3DifferentHN(const VRegister& vd,
+                        const VRegister& vn,
+                        const VRegister& vm,
+                        NEON3DifferentOp vop);
+  void NEONFP2RegMisc(const VRegister& vd,
+                      const VRegister& vn,
+                      NEON2RegMiscOp vop,
+                      double value = 0.0);
+  void NEON2RegMisc(const VRegister& vd,
+                    const VRegister& vn,
+                    NEON2RegMiscOp vop,
+                    int value = 0);
+  void NEONFP2RegMisc(const VRegister& vd,
+                      const VRegister& vn,
+                      Instr op);
+  void NEONAddlp(const VRegister& vd,
+                 const VRegister& vn,
+                 NEON2RegMiscOp op);
+  void NEONPerm(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm,
+                NEONPermOp op);
+  void NEONFPByElement(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm,
+                       int vm_index,
+                       NEONByIndexedElementOp op);
+  void NEONByElement(const VRegister& vd,
+                     const VRegister& vn,
+                     const VRegister& vm,
+                     int vm_index,
+                     NEONByIndexedElementOp op);
+  void NEONByElementL(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm,
+                      int vm_index,
+                      NEONByIndexedElementOp op);
+  void NEONShiftImmediate(const VRegister& vd,
+                          const VRegister& vn,
+                          NEONShiftImmediateOp op,
+                          int immh_immb);
+  void NEONShiftLeftImmediate(const VRegister& vd,
+                              const VRegister& vn,
+                              int shift,
+                              NEONShiftImmediateOp op);
+  void NEONShiftRightImmediate(const VRegister& vd,
+                               const VRegister& vn,
+                               int shift,
+                               NEONShiftImmediateOp op);
+  void NEONShiftImmediateL(const VRegister& vd,
+                           const VRegister& vn,
+                           int shift,
+                           NEONShiftImmediateOp op);
+  void NEONShiftImmediateN(const VRegister& vd,
+                           const VRegister& vn,
+                           int shift,
+                           NEONShiftImmediateOp op);
+  void NEONXtn(const VRegister& vd,
+               const VRegister& vn,
+               NEON2RegMiscOp vop);
+
+  Instr LoadStoreStructAddrModeField(const MemOperand& addr);
+
+  // Encode the specified MemOperand for the specified access size and scaling
+  // preference.
+  Instr LoadStoreMemOperand(const MemOperand& addr,
+                            unsigned access_size,
+                            LoadStoreScalingOption option);
+
+ protected:
+  // Prevent generation of a literal pool for the next |maxInst| instructions.
+  // Guarantees instruction linearity.
+  class AutoBlockLiteralPool {
+    ARMBuffer* armbuffer_;
+
+   public:
+    AutoBlockLiteralPool(Assembler* assembler, size_t maxInst)
+      : armbuffer_(&assembler->armbuffer_) {
+      armbuffer_->enterNoPool(maxInst);
+    }
+    ~AutoBlockLiteralPool() {
+      armbuffer_->leaveNoPool();
+    }
+  };
+
+ protected:
+  // Buffer where the code is emitted.
+  PositionIndependentCodeOption pic_;
+
+  CPUFeatures cpu_features_;
+
+#ifdef DEBUG
+  bool finalized_;
+#endif
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_A64_ASSEMBLER_A64_H_
diff --git a/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h b/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h
new file mode 100644
index 0000000000..e13eef6135
--- /dev/null
+++ b/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h
@@ -0,0 +1,179 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#ifndef VIXL_COMPILER_INTRINSICS_H
+#define VIXL_COMPILER_INTRINSICS_H
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+namespace vixl {
+
+// Helper to check whether the version of GCC used is greater than the specified
+// requirement.
+#define MAJOR 1000000
+#define MINOR 1000
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel)                         \
+    ((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR + __GNUC_PATCHLEVEL__) >=      \
+     ((major) * MAJOR + (minor) * MINOR + (patchlevel)))
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel)                         \
+    ((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR) >=                            \
+     ((major) * MAJOR + (minor) * MINOR + (patchlevel)))
+#else
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) 0
+#endif
+
+
+#if defined(__clang__) && !defined(VIXL_NO_COMPILER_BUILTINS)
+
+#define COMPILER_HAS_BUILTIN_CLRSB    (__has_builtin(__builtin_clrsb))
+#define COMPILER_HAS_BUILTIN_CLZ      (__has_builtin(__builtin_clz))
+#define COMPILER_HAS_BUILTIN_CTZ      (__has_builtin(__builtin_ctz))
+#define COMPILER_HAS_BUILTIN_FFS      (__has_builtin(__builtin_ffs))
+#define COMPILER_HAS_BUILTIN_POPCOUNT (__has_builtin(__builtin_popcount))
+
+#elif defined(__GNUC__) && !defined(VIXL_NO_COMPILER_BUILTINS)
+// The documentation for these builtins is available at:
+// https://gcc.gnu.org/onlinedocs/gcc-$MAJOR.$MINOR.$PATCHLEVEL/gcc//Other-Builtins.html
+
+# define COMPILER_HAS_BUILTIN_CLRSB    (GCC_VERSION_OR_NEWER(4, 7, 0))
+# define COMPILER_HAS_BUILTIN_CLZ      (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_CTZ      (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_FFS      (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_POPCOUNT (GCC_VERSION_OR_NEWER(3, 4, 0))
+
+#else
+// One can define VIXL_NO_COMPILER_BUILTINS to force using the manually
+// implemented C++ methods.
+
+#define COMPILER_HAS_BUILTIN_BSWAP    false
+#define COMPILER_HAS_BUILTIN_CLRSB    false
+#define COMPILER_HAS_BUILTIN_CLZ      false
+#define COMPILER_HAS_BUILTIN_CTZ      false
+#define COMPILER_HAS_BUILTIN_FFS      false
+#define COMPILER_HAS_BUILTIN_POPCOUNT false
+
+#endif
+
+
+template<typename V>
+inline bool IsPowerOf2(V value) {
+  return (value != 0) && ((value & (value - 1)) == 0);
+}
+
+
+// Implementation of intrinsics functions.
+// TODO: The implementations could be improved for sizes different from 32bit
+// and 64bit: we could mask the values and call the appropriate builtin.
+
+
+template<typename V>
+inline int CountLeadingZeros(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CLZ
+  if (width == 32) {
+    return (value == 0) ? 32 : __builtin_clz(static_cast<unsigned>(value));
+  } else if (width == 64) {
+    return (value == 0) ? 64 : __builtin_clzll(value);
+  }
+  MOZ_CRASH("Unhandled width.");
+#else
+  if (width == 32) {
+    return mozilla::CountLeadingZeroes32(value);
+  } else if (width == 64) {
+    return mozilla::CountLeadingZeroes64(value);
+  }
+  MOZ_CRASH("Unhandled width.");
+#endif
+}
+
+
+template<typename V>
+inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CLRSB
+  if (width == 32) {
+    return __builtin_clrsb(value);
+  } else if (width == 64) {
+    return __builtin_clrsbll(value);
+  }
+  MOZ_CRASH("Unhandled width.");
+#else
+  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+  if (value >= 0) {
+    return CountLeadingZeros(value, width) - 1;
+  } else {
+    return CountLeadingZeros(~value, width) - 1;
+  }
+#endif
+}
+
+
+template<typename V>
+inline int CountSetBits(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_POPCOUNT
+  if (width == 32) {
+    return __builtin_popcount(static_cast<unsigned>(value));
+  } else if (width == 64) {
+    return __builtin_popcountll(value);
+  }
+  MOZ_CRASH("Unhandled width.");
+#else
+  if (width == 32) {
+    return mozilla::CountPopulation32(value);
+  } else if (width == 64) {
+    return mozilla::CountPopulation64(value);
+  }
+  MOZ_CRASH("Unhandled width.");
+#endif
+}
+
+
+template<typename V>
+inline int CountTrailingZeros(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CTZ
+  if (width == 32) {
+    return (value == 0) ? 32 : __builtin_ctz(static_cast<unsigned>(value));
+  } else if (width == 64) {
+    return (value == 0) ? 64 : __builtin_ctzll(value);
+  }
+  MOZ_CRASH("Unhandled width.");
+#else
+  if (width == 32) {
+    return mozilla::CountTrailingZeroes32(value);
+  } else if (width == 64) {
+    return mozilla::CountTrailingZeroes64(value);
+  }
+  MOZ_CRASH("Unhandled width.");
+#endif
+}
+
+}  // namespace vixl
+
+#endif  // VIXL_COMPILER_INTRINSICS_H
+
diff --git a/js/src/jit/arm64/vixl/Constants-vixl.h b/js/src/jit/arm64/vixl/Constants-vixl.h
new file mode 100644
index 0000000000..2c174e61a5
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Constants-vixl.h
@@ -0,0 +1,2694 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_CONSTANTS_A64_H_
+#define VIXL_A64_CONSTANTS_A64_H_
+
+#include <stdint.h>
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+namespace vixl {
+
+// Supervisor Call (svc) specific support.
+//
+// The SVC instruction encodes an optional 16-bit immediate value.
+// The simulator understands the codes below.
+enum SVCSimulatorCodes {
+  kCallRtRedirected = 0x10,  // Transition to x86_64 C code.
+  kMarkStackPointer = 0x11,  // Push the current SP on a special Simulator stack.
+  kCheckStackPointer = 0x12  // Pop from the special Simulator stack and compare to SP.
+};
+
+const unsigned kNumberOfRegisters = 32;
+const unsigned kNumberOfVRegisters = 32;
+const unsigned kNumberOfFPRegisters = kNumberOfVRegisters;
+// Callee saved registers are x21-x30(lr).
+const int kNumberOfCalleeSavedRegisters = 10;
+const int kFirstCalleeSavedRegisterIndex = 21;
+// Callee saved FP registers are d8-d15. Note that the high parts of v8-v15 are
+// still caller-saved.
+const int kNumberOfCalleeSavedFPRegisters = 8;
+const int kFirstCalleeSavedFPRegisterIndex = 8;
+
+#define REGISTER_CODE_LIST(R)                                                  \
+R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7)                                 \
+R(8)  R(9)  R(10) R(11) R(12) R(13) R(14) R(15)                                \
+R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23)                                \
+R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
+
+#define INSTRUCTION_FIELDS_LIST(V_)                                            \
+/* Register fields */                                                          \
+V_(Rd, 4, 0, Bits)                        /* Destination register.        */   \
+V_(Rn, 9, 5, Bits)                        /* First source register.       */   \
+V_(Rm, 20, 16, Bits)                      /* Second source register.      */   \
+V_(Ra, 14, 10, Bits)                      /* Third source register.       */   \
+V_(Rt, 4, 0, Bits)                        /* Load/store register.         */   \
+V_(Rt2, 14, 10, Bits)                     /* Load/store second register.  */   \
+V_(Rs, 20, 16, Bits)                      /* Exclusive access status.     */   \
+                                                                               \
+/* Common bits */                                                              \
+V_(SixtyFourBits, 31, 31, Bits)                                                \
+V_(FlagsUpdate, 29, 29, Bits)                                                  \
+                                                                               \
+/* PC relative addressing */                                                   \
+V_(ImmPCRelHi, 23, 5, SignedBits)                                              \
+V_(ImmPCRelLo, 30, 29, Bits)                                                   \
+                                                                               \
+/* Add/subtract/logical shift register */                                      \
+V_(ShiftDP, 23, 22, Bits)                                                      \
+V_(ImmDPShift, 15, 10, Bits)                                                   \
+                                                                               \
+/* Add/subtract immediate */                                                   \
+V_(ImmAddSub, 21, 10, Bits)                                                    \
+V_(ShiftAddSub, 23, 22, Bits)                                                  \
+                                                                               \
+/* Add/substract extend */                                                     \
+V_(ImmExtendShift, 12, 10, Bits)                                               \
+V_(ExtendMode, 15, 13, Bits)                                                   \
+                                                                               \
+/* Move wide */                                                                \
+V_(ImmMoveWide, 20, 5, Bits)                                                   \
+V_(ShiftMoveWide, 22, 21, Bits)                                                \
+                                                                               \
+/* Logical immediate, bitfield and extract */                                  \
+V_(BitN, 22, 22, Bits)                                                         \
+V_(ImmRotate, 21, 16, Bits)                                                    \
+V_(ImmSetBits, 15, 10, Bits)                                                   \
+V_(ImmR, 21, 16, Bits)                                                         \
+V_(ImmS, 15, 10, Bits)                                                         \
+                                                                               \
+/* Test and branch immediate */                                                \
+V_(ImmTestBranch, 18, 5, SignedBits)                                           \
+V_(ImmTestBranchBit40, 23, 19, Bits)                                           \
+V_(ImmTestBranchBit5, 31, 31, Bits)                                            \
+                                                                               \
+/* Conditionals */                                                             \
+V_(Condition, 15, 12, Bits)                                                    \
+V_(ConditionBranch, 3, 0, Bits)                                                \
+V_(Nzcv, 3, 0, Bits)                                                           \
+V_(ImmCondCmp, 20, 16, Bits)                                                   \
+V_(ImmCondBranch, 23, 5, SignedBits)                                           \
+                                                                               \
+/* Floating point */                                                           \
+V_(FPType, 23, 22, Bits)                                                       \
+V_(ImmFP, 20, 13, Bits)                                                        \
+V_(FPScale, 15, 10, Bits)                                                      \
+                                                                               \
+/* Load Store */                                                               \
+V_(ImmLS, 20, 12, SignedBits)                                                  \
+V_(ImmLSUnsigned, 21, 10, Bits)                                                \
+V_(ImmLSPair, 21, 15, SignedBits)                                              \
+V_(ImmShiftLS, 12, 12, Bits)                                                   \
+V_(LSOpc, 23, 22, Bits)                                                        \
+V_(LSVector, 26, 26, Bits)                                                     \
+V_(LSSize, 31, 30, Bits)                                                       \
+V_(ImmPrefetchOperation, 4, 0, Bits)                                           \
+V_(PrefetchHint, 4, 3, Bits)                                                   \
+V_(PrefetchTarget, 2, 1, Bits)                                                 \
+V_(PrefetchStream, 0, 0, Bits)                                                 \
+                                                                               \
+/* Other immediates */                                                         \
+V_(ImmUncondBranch, 25, 0, SignedBits)                                         \
+V_(ImmCmpBranch, 23, 5, SignedBits)                                            \
+V_(ImmLLiteral, 23, 5, SignedBits)                                             \
+V_(ImmException, 20, 5, Bits)                                                  \
+V_(ImmHint, 11, 5, Bits)                                                       \
+V_(ImmBarrierDomain, 11, 10, Bits)                                             \
+V_(ImmBarrierType, 9, 8, Bits)                                                 \
+                                                                               \
+/* System (MRS, MSR, SYS) */                                                   \
+V_(ImmSystemRegister, 19, 5, Bits)                                             \
+V_(SysO0, 19, 19, Bits)                                                        \
+V_(SysOp, 18, 5, Bits)                                                         \
+V_(SysOp1, 18, 16, Bits)                                                       \
+V_(SysOp2, 7, 5, Bits)                                                         \
+V_(CRn, 15, 12, Bits)                                                          \
+V_(CRm, 11, 8, Bits)                                                           \
+                                                                               \
+/* Load-/store-exclusive */                                                    \
+V_(LdStXLoad, 22, 22, Bits)                                                    \
+V_(LdStXNotExclusive, 23, 23, Bits)                                            \
+V_(LdStXAcquireRelease, 15, 15, Bits)                                          \
+V_(LdStXSizeLog2, 31, 30, Bits)                                                \
+V_(LdStXPair, 21, 21, Bits)                                                    \
+                                                                               \
+/* NEON generic fields */                                                      \
+V_(NEONQ, 30, 30, Bits)                                                        \
+V_(NEONSize, 23, 22, Bits)                                                     \
+V_(NEONLSSize, 11, 10, Bits)                                                   \
+V_(NEONS, 12, 12, Bits)                                                        \
+V_(NEONL, 21, 21, Bits)                                                        \
+V_(NEONM, 20, 20, Bits)                                                        \
+V_(NEONH, 11, 11, Bits)                                                        \
+V_(ImmNEONExt, 14, 11, Bits)                                                   \
+V_(ImmNEON5, 20, 16, Bits)                                                     \
+V_(ImmNEON4, 14, 11, Bits)                                                     \
+                                                                               \
+/* NEON Modified Immediate fields */                                           \
+V_(ImmNEONabc, 18, 16, Bits)                                                   \
+V_(ImmNEONdefgh, 9, 5, Bits)                                                   \
+V_(NEONModImmOp, 29, 29, Bits)                                                 \
+V_(NEONCmode, 15, 12, Bits)                                                    \
+                                                                               \
+/* NEON Shift Immediate fields */                                              \
+V_(ImmNEONImmhImmb, 22, 16, Bits)                                              \
+V_(ImmNEONImmh, 22, 19, Bits)                                                  \
+V_(ImmNEONImmb, 18, 16, Bits)
+
+#define SYSTEM_REGISTER_FIELDS_LIST(V_, M_)                                    \
+/* NZCV */                                                                     \
+V_(Flags, 31, 28, Bits)                                                        \
+V_(N, 31, 31, Bits)                                                            \
+V_(Z, 30, 30, Bits)                                                            \
+V_(C, 29, 29, Bits)                                                            \
+V_(V, 28, 28, Bits)                                                            \
+M_(NZCV, Flags_mask)                                                           \
+/* FPCR */                                                                     \
+V_(AHP, 26, 26, Bits)                                                          \
+V_(DN, 25, 25, Bits)                                                           \
+V_(FZ, 24, 24, Bits)                                                           \
+V_(RMode, 23, 22, Bits)                                                        \
+M_(FPCR, AHP_mask | DN_mask | FZ_mask | RMode_mask)
+
+// Fields offsets.
+#define DECLARE_FIELDS_OFFSETS(Name, HighBit, LowBit, X)                       \
+const int Name##_offset = LowBit;                                              \
+const int Name##_width = HighBit - LowBit + 1;                                 \
+const uint32_t Name##_mask = ((1 << Name##_width) - 1) << LowBit;
+#define NOTHING(A, B)
+INSTRUCTION_FIELDS_LIST(DECLARE_FIELDS_OFFSETS)
+SYSTEM_REGISTER_FIELDS_LIST(DECLARE_FIELDS_OFFSETS, NOTHING)
+#undef NOTHING
+#undef DECLARE_FIELDS_BITS
+
+// ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST), formed
+// from ImmPCRelLo and ImmPCRelHi.
+const int ImmPCRel_mask = ImmPCRelLo_mask | ImmPCRelHi_mask;
+
+// Condition codes.
+enum Condition {
+  eq = 0,   // Z set            Equal.
+  ne = 1,   // Z clear          Not equal.
+  cs = 2,   // C set            Carry set.
+  cc = 3,   // C clear          Carry clear.
+  mi = 4,   // N set            Negative.
+  pl = 5,   // N clear          Positive or zero.
+  vs = 6,   // V set            Overflow.
+  vc = 7,   // V clear          No overflow.
+  hi = 8,   // C set, Z clear   Unsigned higher.
+  ls = 9,   // C clear or Z set Unsigned lower or same.
+  ge = 10,  // N == V           Greater or equal.
+  lt = 11,  // N != V           Less than.
+  gt = 12,  // Z clear, N == V  Greater than.
+  le = 13,  // Z set or N != V  Less then or equal
+  al = 14,  //                  Always.
+  nv = 15,  // Behaves as always/al.
+
+  // Aliases.
+  hs = cs,  // C set            Unsigned higher or same.
+  lo = cc,  // C clear          Unsigned lower.
+
+  // Mozilla expanded aliases.
+  Equal = 0, Zero = 0,
+  NotEqual = 1, NonZero = 1,
+  AboveOrEqual = 2, CarrySet = 2,
+  Below = 3, CarryClear = 3,
+  Signed = 4,
+  NotSigned = 5,
+  Overflow = 6,
+  NoOverflow = 7,
+  Above = 8,
+  BelowOrEqual = 9,
+  GreaterThanOrEqual_ = 10,
+  LessThan_ = 11,
+  GreaterThan_ = 12,
+  LessThanOrEqual_ = 13,
+  Always = 14,
+  Never = 15
+};
+
+inline Condition InvertCondition(Condition cond) {
+  // Conditions al and nv behave identically, as "always true". They can't be
+  // inverted, because there is no "always false" condition.
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  return static_cast<Condition>(cond ^ 1);
+}
+
+enum FPTrapFlags {
+  EnableTrap   = 1,
+  DisableTrap = 0
+};
+
+enum FlagsUpdate {
+  SetFlags   = 1,
+  LeaveFlags = 0
+};
+
+enum StatusFlags {
+  NoFlag    = 0,
+
+  // Derive the flag combinations from the system register bit descriptions.
+  NFlag     = N_mask,
+  ZFlag     = Z_mask,
+  CFlag     = C_mask,
+  VFlag     = V_mask,
+  NZFlag    = NFlag | ZFlag,
+  NCFlag    = NFlag | CFlag,
+  NVFlag    = NFlag | VFlag,
+  ZCFlag    = ZFlag | CFlag,
+  ZVFlag    = ZFlag | VFlag,
+  CVFlag    = CFlag | VFlag,
+  NZCFlag   = NFlag | ZFlag | CFlag,
+  NZVFlag   = NFlag | ZFlag | VFlag,
+  NCVFlag   = NFlag | CFlag | VFlag,
+  ZCVFlag   = ZFlag | CFlag | VFlag,
+  NZCVFlag  = NFlag | ZFlag | CFlag | VFlag,
+
+  // Floating-point comparison results.
+  FPEqualFlag       = ZCFlag,
+  FPLessThanFlag    = NFlag,
+  FPGreaterThanFlag = CFlag,
+  FPUnorderedFlag   = CVFlag
+};
+
+enum Shift {
+  NO_SHIFT = -1,
+  LSL = 0x0,
+  LSR = 0x1,
+  ASR = 0x2,
+  ROR = 0x3,
+  MSL = 0x4
+};
+
+enum Extend {
+  NO_EXTEND = -1,
+  UXTB      = 0,
+  UXTH      = 1,
+  UXTW      = 2,
+  UXTX      = 3,
+  SXTB      = 4,
+  SXTH      = 5,
+  SXTW      = 6,
+  SXTX      = 7
+};
+
+enum SystemHint {
+  NOP    = 0,
+  YIELD  = 1,
+  WFE    = 2,
+  WFI    = 3,
+  SEV    = 4,
+  SEVL   = 5,
+  ESB    = 16,
+  CSDB   = 20,
+  BTI    = 32,
+  BTI_c  = 34,
+  BTI_j  = 36,
+  BTI_jc = 38
+};
+
+enum BranchTargetIdentifier {
+  EmitBTI_none = NOP,
+  EmitBTI = BTI,
+  EmitBTI_c = BTI_c,
+  EmitBTI_j = BTI_j,
+  EmitBTI_jc = BTI_jc,
+
+  // These correspond to the values of the CRm:op2 fields in the equivalent HINT
+  // instruction.
+  EmitPACIASP = 25,
+  EmitPACIBSP = 27
+};
+
+enum BarrierDomain {
+  OuterShareable = 0,
+  NonShareable   = 1,
+  InnerShareable = 2,
+  FullSystem     = 3
+};
+
+enum BarrierType {
+  BarrierOther  = 0,
+  BarrierReads  = 1,
+  BarrierWrites = 2,
+  BarrierAll    = 3
+};
+
+enum PrefetchOperation {
+  PLDL1KEEP = 0x00,
+  PLDL1STRM = 0x01,
+  PLDL2KEEP = 0x02,
+  PLDL2STRM = 0x03,
+  PLDL3KEEP = 0x04,
+  PLDL3STRM = 0x05,
+
+  PLIL1KEEP = 0x08,
+  PLIL1STRM = 0x09,
+  PLIL2KEEP = 0x0a,
+  PLIL2STRM = 0x0b,
+  PLIL3KEEP = 0x0c,
+  PLIL3STRM = 0x0d,
+
+  PSTL1KEEP = 0x10,
+  PSTL1STRM = 0x11,
+  PSTL2KEEP = 0x12,
+  PSTL2STRM = 0x13,
+  PSTL3KEEP = 0x14,
+  PSTL3STRM = 0x15
+};
+
+enum BType {
+  // Set when executing any instruction on a guarded page, except those cases
+  // listed below.
+  DefaultBType = 0,
+
+  // Set when an indirect branch is taken from an unguarded page to a guarded
+  // page, or from a guarded page to ip0 or ip1 (x16 or x17), eg "br ip0".
+  BranchFromUnguardedOrToIP = 1,
+
+  // Set when an indirect branch and link (call) is taken, eg. "blr x0".
+  BranchAndLink = 2,
+
+  // Set when an indirect branch is taken from a guarded page to a register
+  // that is not ip0 or ip1 (x16 or x17), eg, "br x0".
+  BranchFromGuardedNotToIP = 3
+};
+
+template<int op0, int op1, int crn, int crm, int op2>
+class SystemRegisterEncoder {
+ public:
+  static const uint32_t value =
+      ((op0 << SysO0_offset) |
+       (op1 << SysOp1_offset) |
+       (crn << CRn_offset) |
+       (crm << CRm_offset) |
+       (op2 << SysOp2_offset)) >> ImmSystemRegister_offset;
+};
+
+// System/special register names.
+// This information is not encoded as one field but as the concatenation of
+// multiple fields (Op0, Op1, Crn, Crm, Op2).
+enum SystemRegister {
+  NZCV = SystemRegisterEncoder<3, 3, 4, 2, 0>::value,
+  FPCR = SystemRegisterEncoder<3, 3, 4, 4, 0>::value,
+  RNDR = SystemRegisterEncoder<3, 3, 2, 4, 0>::value,    // Random number.
+  RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value   // Reseeded random number.
+};
+
+template<int op1, int crn, int crm, int op2>
+class CacheOpEncoder {
+ public:
+  static const uint32_t value =
+      ((op1 << SysOp1_offset) |
+       (crn << CRn_offset) |
+       (crm << CRm_offset) |
+       (op2 << SysOp2_offset)) >> SysOp_offset;
+};
+
+enum InstructionCacheOp : uint32_t {
+  IVAU = CacheOpEncoder<3, 7, 5, 1>::value
+};
+
+enum DataCacheOp : uint32_t {
+  CVAC = CacheOpEncoder<3, 7, 10, 1>::value,
+  CVAU = CacheOpEncoder<3, 7, 11, 1>::value,
+  CVAP = CacheOpEncoder<3, 7, 12, 1>::value,
+  CVADP = CacheOpEncoder<3, 7, 13, 1>::value,
+  CIVAC = CacheOpEncoder<3, 7, 14, 1>::value,
+  ZVA = CacheOpEncoder<3, 7, 4, 1>::value
+};
+
+// Instruction enumerations.
+//
+// These are the masks that define a class of instructions, and the list of
+// instructions within each class. Each enumeration has a Fixed, FMask and
+// Mask value.
+//
+// Fixed: The fixed bits in this instruction class.
+// FMask: The mask used to extract the fixed bits in the class.
+// Mask:  The mask used to identify the instructions within a class.
+//
+// The enumerations can be used like this:
+//
+// VIXL_ASSERT(instr->Mask(PCRelAddressingFMask) == PCRelAddressingFixed);
+// switch(instr->Mask(PCRelAddressingMask)) {
+//   case ADR:  Format("adr 'Xd, 'AddrPCRelByte"); break;
+//   case ADRP: Format("adrp 'Xd, 'AddrPCRelPage"); break;
+//   default:   printf("Unknown instruction\n");
+// }
+
+
+// Generic fields.
+enum GenericInstrField : uint32_t {
+  SixtyFourBits        = 0x80000000,
+  ThirtyTwoBits        = 0x00000000,
+
+  FPTypeMask           = 0x00C00000,
+  FP16                 = 0x00C00000,
+  FP32                 = 0x00000000,
+  FP64                 = 0x00400000
+};
+
+enum NEONFormatField : uint32_t {
+  NEONFormatFieldMask   = 0x40C00000,
+  NEON_Q                = 0x40000000,
+  NEON_8B               = 0x00000000,
+  NEON_16B              = NEON_8B | NEON_Q,
+  NEON_4H               = 0x00400000,
+  NEON_8H               = NEON_4H | NEON_Q,
+  NEON_2S               = 0x00800000,
+  NEON_4S               = NEON_2S | NEON_Q,
+  NEON_1D               = 0x00C00000,
+  NEON_2D               = 0x00C00000 | NEON_Q
+};
+
+enum NEONFPFormatField : uint32_t {
+  NEONFPFormatFieldMask = 0x40400000,
+  NEON_FP_4H            = FP16,
+  NEON_FP_2S            = FP32,
+  NEON_FP_8H            = FP16 | NEON_Q,
+  NEON_FP_4S            = FP32 | NEON_Q,
+  NEON_FP_2D            = FP64 | NEON_Q
+};
+
+enum NEONLSFormatField : uint32_t {
+  NEONLSFormatFieldMask = 0x40000C00,
+  LS_NEON_8B            = 0x00000000,
+  LS_NEON_16B           = LS_NEON_8B | NEON_Q,
+  LS_NEON_4H            = 0x00000400,
+  LS_NEON_8H            = LS_NEON_4H | NEON_Q,
+  LS_NEON_2S            = 0x00000800,
+  LS_NEON_4S            = LS_NEON_2S | NEON_Q,
+  LS_NEON_1D            = 0x00000C00,
+  LS_NEON_2D            = LS_NEON_1D | NEON_Q
+};
+
+enum NEONScalarFormatField : uint32_t {
+  NEONScalarFormatFieldMask = 0x00C00000,
+  NEONScalar                = 0x10000000,
+  NEON_B                    = 0x00000000,
+  NEON_H                    = 0x00400000,
+  NEON_S                    = 0x00800000,
+  NEON_D                    = 0x00C00000
+};
+
+// PC relative addressing.
+enum PCRelAddressingOp : uint32_t {
+  PCRelAddressingFixed = 0x10000000,
+  PCRelAddressingFMask = 0x1F000000,
+  PCRelAddressingMask  = 0x9F000000,
+  ADR                  = PCRelAddressingFixed | 0x00000000,
+  ADRP                 = PCRelAddressingFixed | 0x80000000
+};
+
+// Add/sub (immediate, shifted and extended.)
+const int kSFOffset = 31;
+enum AddSubOp : uint32_t {
+  AddSubOpMask      = 0x60000000,
+  AddSubSetFlagsBit = 0x20000000,
+  ADD               = 0x00000000,
+  ADDS              = ADD | AddSubSetFlagsBit,
+  SUB               = 0x40000000,
+  SUBS              = SUB | AddSubSetFlagsBit
+};
+
+#define ADD_SUB_OP_LIST(V)  \
+  V(ADD),                   \
+  V(ADDS),                  \
+  V(SUB),                   \
+  V(SUBS)
+
+enum AddSubImmediateOp : uint32_t {
+  AddSubImmediateFixed = 0x11000000,
+  AddSubImmediateFMask = 0x1F000000,
+  AddSubImmediateMask  = 0xFF000000,
+  #define ADD_SUB_IMMEDIATE(A)           \
+  A##_w_imm = AddSubImmediateFixed | A,  \
+  A##_x_imm = AddSubImmediateFixed | A | SixtyFourBits
+  ADD_SUB_OP_LIST(ADD_SUB_IMMEDIATE)
+  #undef ADD_SUB_IMMEDIATE
+};
+
+enum AddSubShiftedOp : uint32_t {
+  AddSubShiftedFixed   = 0x0B000000,
+  AddSubShiftedFMask   = 0x1F200000,
+  AddSubShiftedMask    = 0xFF200000,
+  #define ADD_SUB_SHIFTED(A)             \
+  A##_w_shift = AddSubShiftedFixed | A,  \
+  A##_x_shift = AddSubShiftedFixed | A | SixtyFourBits
+  ADD_SUB_OP_LIST(ADD_SUB_SHIFTED)
+  #undef ADD_SUB_SHIFTED
+};
+
+enum AddSubExtendedOp : uint32_t {
+  AddSubExtendedFixed  = 0x0B200000,
+  AddSubExtendedFMask  = 0x1F200000,
+  AddSubExtendedMask   = 0xFFE00000,
+  #define ADD_SUB_EXTENDED(A)           \
+  A##_w_ext = AddSubExtendedFixed | A,  \
+  A##_x_ext = AddSubExtendedFixed | A | SixtyFourBits
+  ADD_SUB_OP_LIST(ADD_SUB_EXTENDED)
+  #undef ADD_SUB_EXTENDED
+};
+
+// Add/sub with carry.
+enum AddSubWithCarryOp : uint32_t {
+  AddSubWithCarryFixed = 0x1A000000,
+  AddSubWithCarryFMask = 0x1FE00000,
+  AddSubWithCarryMask  = 0xFFE0FC00,
+  ADC_w                = AddSubWithCarryFixed | ADD,
+  ADC_x                = AddSubWithCarryFixed | ADD | SixtyFourBits,
+  ADC                  = ADC_w,
+  ADCS_w               = AddSubWithCarryFixed | ADDS,
+  ADCS_x               = AddSubWithCarryFixed | ADDS | SixtyFourBits,
+  SBC_w                = AddSubWithCarryFixed | SUB,
+  SBC_x                = AddSubWithCarryFixed | SUB | SixtyFourBits,
+  SBC                  = SBC_w,
+  SBCS_w               = AddSubWithCarryFixed | SUBS,
+  SBCS_x               = AddSubWithCarryFixed | SUBS | SixtyFourBits
+};
+
+// Rotate right into flags.
+enum RotateRightIntoFlagsOp : uint32_t {
+  RotateRightIntoFlagsFixed = 0x1A000400,
+  RotateRightIntoFlagsFMask = 0x1FE07C00,
+  RotateRightIntoFlagsMask  = 0xFFE07C10,
+  RMIF                      = RotateRightIntoFlagsFixed | 0xA0000000
+};
+
+// Evaluate into flags.
+enum EvaluateIntoFlagsOp : uint32_t {
+  EvaluateIntoFlagsFixed = 0x1A000800,
+  EvaluateIntoFlagsFMask = 0x1FE03C00,
+  EvaluateIntoFlagsMask  = 0xFFE07C1F,
+  SETF8                  = EvaluateIntoFlagsFixed | 0x2000000D,
+  SETF16                 = EvaluateIntoFlagsFixed | 0x2000400D
+};
+
+// Logical (immediate and shifted register).
+enum LogicalOp : uint32_t {
+  LogicalOpMask = 0x60200000,
+  NOT   = 0x00200000,
+  AND   = 0x00000000,
+  BIC   = AND | NOT,
+  ORR   = 0x20000000,
+  ORN   = ORR | NOT,
+  EOR   = 0x40000000,
+  EON   = EOR | NOT,
+  ANDS  = 0x60000000,
+  BICS  = ANDS | NOT
+};
+
+// Logical immediate.
+enum LogicalImmediateOp : uint32_t {
+  LogicalImmediateFixed = 0x12000000,
+  LogicalImmediateFMask = 0x1F800000,
+  LogicalImmediateMask  = 0xFF800000,
+  AND_w_imm   = LogicalImmediateFixed | AND,
+  AND_x_imm   = LogicalImmediateFixed | AND | SixtyFourBits,
+  ORR_w_imm   = LogicalImmediateFixed | ORR,
+  ORR_x_imm   = LogicalImmediateFixed | ORR | SixtyFourBits,
+  EOR_w_imm   = LogicalImmediateFixed | EOR,
+  EOR_x_imm   = LogicalImmediateFixed | EOR | SixtyFourBits,
+  ANDS_w_imm  = LogicalImmediateFixed | ANDS,
+  ANDS_x_imm  = LogicalImmediateFixed | ANDS | SixtyFourBits
+};
+
+// Logical shifted register.
+enum LogicalShiftedOp : uint32_t {
+  LogicalShiftedFixed = 0x0A000000,
+  LogicalShiftedFMask = 0x1F000000,
+  LogicalShiftedMask  = 0xFF200000,
+  AND_w               = LogicalShiftedFixed | AND,
+  AND_x               = LogicalShiftedFixed | AND | SixtyFourBits,
+  AND_shift           = AND_w,
+  BIC_w               = LogicalShiftedFixed | BIC,
+  BIC_x               = LogicalShiftedFixed | BIC | SixtyFourBits,
+  BIC_shift           = BIC_w,
+  ORR_w               = LogicalShiftedFixed | ORR,
+  ORR_x               = LogicalShiftedFixed | ORR | SixtyFourBits,
+  ORR_shift           = ORR_w,
+  ORN_w               = LogicalShiftedFixed | ORN,
+  ORN_x               = LogicalShiftedFixed | ORN | SixtyFourBits,
+  ORN_shift           = ORN_w,
+  EOR_w               = LogicalShiftedFixed | EOR,
+  EOR_x               = LogicalShiftedFixed | EOR | SixtyFourBits,
+  EOR_shift           = EOR_w,
+  EON_w               = LogicalShiftedFixed | EON,
+  EON_x               = LogicalShiftedFixed | EON | SixtyFourBits,
+  EON_shift           = EON_w,
+  ANDS_w              = LogicalShiftedFixed | ANDS,
+  ANDS_x              = LogicalShiftedFixed | ANDS | SixtyFourBits,
+  ANDS_shift          = ANDS_w,
+  BICS_w              = LogicalShiftedFixed | BICS,
+  BICS_x              = LogicalShiftedFixed | BICS | SixtyFourBits,
+  BICS_shift          = BICS_w
+};
+
+// Move wide immediate.
+enum MoveWideImmediateOp : uint32_t {
+  MoveWideImmediateFixed = 0x12800000,
+  MoveWideImmediateFMask = 0x1F800000,
+  MoveWideImmediateMask  = 0xFF800000,
+  MOVN                   = 0x00000000,
+  MOVZ                   = 0x40000000,
+  MOVK                   = 0x60000000,
+  MOVN_w                 = MoveWideImmediateFixed | MOVN,
+  MOVN_x                 = MoveWideImmediateFixed | MOVN | SixtyFourBits,
+  MOVZ_w                 = MoveWideImmediateFixed | MOVZ,
+  MOVZ_x                 = MoveWideImmediateFixed | MOVZ | SixtyFourBits,
+  MOVK_w                 = MoveWideImmediateFixed | MOVK,
+  MOVK_x                 = MoveWideImmediateFixed | MOVK | SixtyFourBits
+};
+
+// Bitfield.
+const int kBitfieldNOffset = 22;
+enum BitfieldOp : uint32_t {
+  BitfieldFixed = 0x13000000,
+  BitfieldFMask = 0x1F800000,
+  BitfieldMask  = 0xFF800000,
+  SBFM_w        = BitfieldFixed | 0x00000000,
+  SBFM_x        = BitfieldFixed | 0x80000000,
+  SBFM          = SBFM_w,
+  BFM_w         = BitfieldFixed | 0x20000000,
+  BFM_x         = BitfieldFixed | 0xA0000000,
+  BFM           = BFM_w,
+  UBFM_w        = BitfieldFixed | 0x40000000,
+  UBFM_x        = BitfieldFixed | 0xC0000000,
+  UBFM          = UBFM_w
+  // Bitfield N field.
+};
+
+// Extract.
+enum ExtractOp : uint32_t {
+  ExtractFixed = 0x13800000,
+  ExtractFMask = 0x1F800000,
+  ExtractMask  = 0xFFA00000,
+  EXTR_w       = ExtractFixed | 0x00000000,
+  EXTR_x       = ExtractFixed | 0x80000000,
+  EXTR         = EXTR_w
+};
+
+// Unconditional branch.
+enum UnconditionalBranchOp : uint32_t {
+  UnconditionalBranchFixed = 0x14000000,
+  UnconditionalBranchFMask = 0x7C000000,
+  UnconditionalBranchMask  = 0xFC000000,
+  B                        = UnconditionalBranchFixed | 0x00000000,
+  BL                       = UnconditionalBranchFixed | 0x80000000
+};
+
+// Unconditional branch to register.
+enum UnconditionalBranchToRegisterOp : uint32_t {
+  UnconditionalBranchToRegisterFixed = 0xD6000000,
+  UnconditionalBranchToRegisterFMask = 0xFE000000,
+  UnconditionalBranchToRegisterMask  = 0xFFFFFC00,
+  BR      = UnconditionalBranchToRegisterFixed | 0x001F0000,
+  BLR     = UnconditionalBranchToRegisterFixed | 0x003F0000,
+  RET     = UnconditionalBranchToRegisterFixed | 0x005F0000,
+
+  BRAAZ  = UnconditionalBranchToRegisterFixed | 0x001F0800,
+  BRABZ  = UnconditionalBranchToRegisterFixed | 0x001F0C00,
+  BLRAAZ = UnconditionalBranchToRegisterFixed | 0x003F0800,
+  BLRABZ = UnconditionalBranchToRegisterFixed | 0x003F0C00,
+  RETAA  = UnconditionalBranchToRegisterFixed | 0x005F0800,
+  RETAB  = UnconditionalBranchToRegisterFixed | 0x005F0C00,
+  BRAA   = UnconditionalBranchToRegisterFixed | 0x011F0800,
+  BRAB   = UnconditionalBranchToRegisterFixed | 0x011F0C00,
+  BLRAA  = UnconditionalBranchToRegisterFixed | 0x013F0800,
+  BLRAB  = UnconditionalBranchToRegisterFixed | 0x013F0C00
+};
+
+// Compare and branch.
+enum CompareBranchOp : uint32_t {
+  CompareBranchFixed = 0x34000000,
+  CompareBranchFMask = 0x7E000000,
+  CompareBranchMask  = 0xFF000000,
+  CBZ_w              = CompareBranchFixed | 0x00000000,
+  CBZ_x              = CompareBranchFixed | 0x80000000,
+  CBZ                = CBZ_w,
+  CBNZ_w             = CompareBranchFixed | 0x01000000,
+  CBNZ_x             = CompareBranchFixed | 0x81000000,
+  CBNZ               = CBNZ_w
+};
+
+// Test and branch.
+enum TestBranchOp : uint32_t {
+  TestBranchFixed = 0x36000000,
+  TestBranchFMask = 0x7E000000,
+  TestBranchMask  = 0x7F000000,
+  TBZ             = TestBranchFixed | 0x00000000,
+  TBNZ            = TestBranchFixed | 0x01000000
+};
+
+// Conditional branch.
+enum ConditionalBranchOp : uint32_t {
+  ConditionalBranchFixed = 0x54000000,
+  ConditionalBranchFMask = 0xFE000000,
+  ConditionalBranchMask  = 0xFF000010,
+  B_cond                 = ConditionalBranchFixed | 0x00000000
+};
+
+// System.
+// System instruction encoding is complicated because some instructions use op
+// and CR fields to encode parameters. To handle this cleanly, the system
+// instructions are split into more than one enum.
+
+enum SystemOp : uint32_t {
+  SystemFixed = 0xD5000000,
+  SystemFMask = 0xFFC00000
+};
+
+enum SystemSysRegOp : uint32_t {
+  SystemSysRegFixed = 0xD5100000,
+  SystemSysRegFMask = 0xFFD00000,
+  SystemSysRegMask  = 0xFFF00000,
+  MRS               = SystemSysRegFixed | 0x00200000,
+  MSR               = SystemSysRegFixed | 0x00000000
+};
+
+enum SystemPStateOp : uint32_t {
+  SystemPStateFixed = 0xD5004000,
+  SystemPStateFMask = 0xFFF8F000,
+  SystemPStateMask  = 0xFFFFF0FF,
+  CFINV             = SystemPStateFixed | 0x0000001F,
+  XAFLAG            = SystemPStateFixed | 0x0000003F,
+  AXFLAG            = SystemPStateFixed | 0x0000005F
+};
+
+enum SystemHintOp : uint32_t {
+  SystemHintFixed = 0xD503201F,
+  SystemHintFMask = 0xFFFFF01F,
+  SystemHintMask  = 0xFFFFF01F,
+  HINT            = SystemHintFixed | 0x00000000
+};
+
+enum SystemSysOp : uint32_t {
+  SystemSysFixed  = 0xD5080000,
+  SystemSysFMask  = 0xFFF80000,
+  SystemSysMask   = 0xFFF80000,
+  SYS             = SystemSysFixed | 0x00000000
+};
+
+// Exception.
+enum ExceptionOp : uint32_t {
+  ExceptionFixed = 0xD4000000,
+  ExceptionFMask = 0xFF000000,
+  ExceptionMask  = 0xFFE0001F,
+  HLT            = ExceptionFixed | 0x00400000,
+  BRK            = ExceptionFixed | 0x00200000,
+  SVC            = ExceptionFixed | 0x00000001,
+  HVC            = ExceptionFixed | 0x00000002,
+  SMC            = ExceptionFixed | 0x00000003,
+  DCPS1          = ExceptionFixed | 0x00A00001,
+  DCPS2          = ExceptionFixed | 0x00A00002,
+  DCPS3          = ExceptionFixed | 0x00A00003
+};
+
+enum MemBarrierOp : uint32_t {
+  MemBarrierFixed = 0xD503309F,
+  MemBarrierFMask = 0xFFFFF09F,
+  MemBarrierMask  = 0xFFFFF0FF,
+  DSB             = MemBarrierFixed | 0x00000000,
+  DMB             = MemBarrierFixed | 0x00000020,
+  ISB             = MemBarrierFixed | 0x00000040
+};
+
+enum SystemExclusiveMonitorOp : uint32_t {
+  SystemExclusiveMonitorFixed = 0xD503305F,
+  SystemExclusiveMonitorFMask = 0xFFFFF0FF,
+  SystemExclusiveMonitorMask  = 0xFFFFF0FF,
+  CLREX                       = SystemExclusiveMonitorFixed
+};
+
+enum SystemPAuthOp : uint32_t {
+  SystemPAuthFixed = 0xD503211F,
+  SystemPAuthFMask = 0xFFFFFD1F,
+  SystemPAuthMask  = 0xFFFFFFFF,
+  PACIA1716 = SystemPAuthFixed | 0x00000100,
+  PACIB1716 = SystemPAuthFixed | 0x00000140,
+  AUTIA1716 = SystemPAuthFixed | 0x00000180,
+  AUTIB1716 = SystemPAuthFixed | 0x000001C0,
+  PACIAZ    = SystemPAuthFixed | 0x00000300,
+  PACIASP   = SystemPAuthFixed | 0x00000320,
+  PACIBZ    = SystemPAuthFixed | 0x00000340,
+  PACIBSP   = SystemPAuthFixed | 0x00000360,
+  AUTIAZ    = SystemPAuthFixed | 0x00000380,
+  AUTIASP   = SystemPAuthFixed | 0x000003A0,
+  AUTIBZ    = SystemPAuthFixed | 0x000003C0,
+  AUTIBSP   = SystemPAuthFixed | 0x000003E0,
+
+  // XPACLRI has the same fixed mask as System Hints and needs to be handled
+  // differently.
+  XPACLRI   = 0xD50320FF
+};
+
+// Any load or store.
+enum LoadStoreAnyOp : uint32_t {
+  LoadStoreAnyFMask = 0x0a000000,
+  LoadStoreAnyFixed = 0x08000000
+};
+
+// Any load pair or store pair.
+enum LoadStorePairAnyOp : uint32_t {
+  LoadStorePairAnyFMask = 0x3a000000,
+  LoadStorePairAnyFixed = 0x28000000
+};
+
+#define LOAD_STORE_PAIR_OP_LIST(V)  \
+  V(STP, w,   0x00000000),          \
+  V(LDP, w,   0x00400000),          \
+  V(LDPSW, x, 0x40400000),          \
+  V(STP, x,   0x80000000),          \
+  V(LDP, x,   0x80400000),          \
+  V(STP, s,   0x04000000),          \
+  V(LDP, s,   0x04400000),          \
+  V(STP, d,   0x44000000),          \
+  V(LDP, d,   0x44400000),          \
+  V(STP, q,   0x84000000),          \
+  V(LDP, q,   0x84400000)
+
+// Load/store pair (post, pre and offset.)
+enum LoadStorePairOp : uint32_t {
+  LoadStorePairMask = 0xC4400000,
+  LoadStorePairLBit = 1 << 22,
+  #define LOAD_STORE_PAIR(A, B, C) \
+  A##_##B = C
+  LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR)
+  #undef LOAD_STORE_PAIR
+};
+
+enum LoadStorePairPostIndexOp : uint32_t {
+  LoadStorePairPostIndexFixed = 0x28800000,
+  LoadStorePairPostIndexFMask = 0x3B800000,
+  LoadStorePairPostIndexMask  = 0xFFC00000,
+  #define LOAD_STORE_PAIR_POST_INDEX(A, B, C)  \
+  A##_##B##_post = LoadStorePairPostIndexFixed | A##_##B
+  LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_POST_INDEX)
+  #undef LOAD_STORE_PAIR_POST_INDEX
+};
+
+enum LoadStorePairPreIndexOp : uint32_t {
+  LoadStorePairPreIndexFixed = 0x29800000,
+  LoadStorePairPreIndexFMask = 0x3B800000,
+  LoadStorePairPreIndexMask  = 0xFFC00000,
+  #define LOAD_STORE_PAIR_PRE_INDEX(A, B, C)  \
+  A##_##B##_pre = LoadStorePairPreIndexFixed | A##_##B
+  LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_PRE_INDEX)
+  #undef LOAD_STORE_PAIR_PRE_INDEX
+};
+
+enum LoadStorePairOffsetOp : uint32_t {
+  LoadStorePairOffsetFixed = 0x29000000,
+  LoadStorePairOffsetFMask = 0x3B800000,
+  LoadStorePairOffsetMask  = 0xFFC00000,
+  #define LOAD_STORE_PAIR_OFFSET(A, B, C)  \
+  A##_##B##_off = LoadStorePairOffsetFixed | A##_##B
+  LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_OFFSET)
+  #undef LOAD_STORE_PAIR_OFFSET
+};
+
+enum LoadStorePairNonTemporalOp : uint32_t {
+  LoadStorePairNonTemporalFixed = 0x28000000,
+  LoadStorePairNonTemporalFMask = 0x3B800000,
+  LoadStorePairNonTemporalMask  = 0xFFC00000,
+  LoadStorePairNonTemporalLBit = 1 << 22,
+  STNP_w = LoadStorePairNonTemporalFixed | STP_w,
+  LDNP_w = LoadStorePairNonTemporalFixed | LDP_w,
+  STNP_x = LoadStorePairNonTemporalFixed | STP_x,
+  LDNP_x = LoadStorePairNonTemporalFixed | LDP_x,
+  STNP_s = LoadStorePairNonTemporalFixed | STP_s,
+  LDNP_s = LoadStorePairNonTemporalFixed | LDP_s,
+  STNP_d = LoadStorePairNonTemporalFixed | STP_d,
+  LDNP_d = LoadStorePairNonTemporalFixed | LDP_d,
+  STNP_q = LoadStorePairNonTemporalFixed | STP_q,
+  LDNP_q = LoadStorePairNonTemporalFixed | LDP_q
+};
+
+// Load with pointer authentication.
+enum LoadStorePACOp : uint32_t {
+  LoadStorePACFixed  = 0xF8200400,
+  LoadStorePACFMask  = 0xFF200400,
+  LoadStorePACMask   = 0xFFA00C00,
+  LoadStorePACPreBit = 0x00000800,
+  LDRAA     = LoadStorePACFixed | 0x00000000,
+  LDRAA_pre = LoadStorePACPreBit | LDRAA,
+  LDRAB     = LoadStorePACFixed | 0x00800000,
+  LDRAB_pre = LoadStorePACPreBit | LDRAB
+};
+
+// Load literal.
+enum LoadLiteralOp : uint32_t {
+  LoadLiteralFixed = 0x18000000,
+  LoadLiteralFMask = 0x3B000000,
+  LoadLiteralMask  = 0xFF000000,
+  LDR_w_lit        = LoadLiteralFixed | 0x00000000,
+  LDR_x_lit        = LoadLiteralFixed | 0x40000000,
+  LDRSW_x_lit      = LoadLiteralFixed | 0x80000000,
+  PRFM_lit         = LoadLiteralFixed | 0xC0000000,
+  LDR_s_lit        = LoadLiteralFixed | 0x04000000,
+  LDR_d_lit        = LoadLiteralFixed | 0x44000000,
+  LDR_q_lit        = LoadLiteralFixed | 0x84000000
+};
+
+#define LOAD_STORE_OP_LIST(V)     \
+  V(ST, RB, w,  0x00000000),  \
+  V(ST, RH, w,  0x40000000),  \
+  V(ST, R, w,   0x80000000),  \
+  V(ST, R, x,   0xC0000000),  \
+  V(LD, RB, w,  0x00400000),  \
+  V(LD, RH, w,  0x40400000),  \
+  V(LD, R, w,   0x80400000),  \
+  V(LD, R, x,   0xC0400000),  \
+  V(LD, RSB, x, 0x00800000),  \
+  V(LD, RSH, x, 0x40800000),  \
+  V(LD, RSW, x, 0x80800000),  \
+  V(LD, RSB, w, 0x00C00000),  \
+  V(LD, RSH, w, 0x40C00000),  \
+  V(ST, R, b,   0x04000000),  \
+  V(ST, R, h,   0x44000000),  \
+  V(ST, R, s,   0x84000000),  \
+  V(ST, R, d,   0xC4000000),  \
+  V(ST, R, q,   0x04800000),  \
+  V(LD, R, b,   0x04400000),  \
+  V(LD, R, h,   0x44400000),  \
+  V(LD, R, s,   0x84400000),  \
+  V(LD, R, d,   0xC4400000),  \
+  V(LD, R, q,   0x04C00000)
+
+// Load/store (post, pre, offset and unsigned.)
+enum LoadStoreOp : uint32_t {
+  LoadStoreMask = 0xC4C00000,
+  LoadStoreVMask = 0x04000000,
+  #define LOAD_STORE(A, B, C, D)  \
+  A##B##_##C = D
+  LOAD_STORE_OP_LIST(LOAD_STORE),
+  #undef LOAD_STORE
+  PRFM = 0xC0800000
+};
+
+// Load/store unscaled offset.
+enum LoadStoreUnscaledOffsetOp : uint32_t {
+  LoadStoreUnscaledOffsetFixed = 0x38000000,
+  LoadStoreUnscaledOffsetFMask = 0x3B200C00,
+  LoadStoreUnscaledOffsetMask  = 0xFFE00C00,
+  PRFUM                        = LoadStoreUnscaledOffsetFixed | PRFM,
+  #define LOAD_STORE_UNSCALED(A, B, C, D)  \
+  A##U##B##_##C = LoadStoreUnscaledOffsetFixed | D
+  LOAD_STORE_OP_LIST(LOAD_STORE_UNSCALED)
+  #undef LOAD_STORE_UNSCALED
+};
+
+// Load/store post index.
+enum LoadStorePostIndex : uint32_t {
+  LoadStorePostIndexFixed = 0x38000400,
+  LoadStorePostIndexFMask = 0x3B200C00,
+  LoadStorePostIndexMask  = 0xFFE00C00,
+  #define LOAD_STORE_POST_INDEX(A, B, C, D)  \
+  A##B##_##C##_post = LoadStorePostIndexFixed | D
+  LOAD_STORE_OP_LIST(LOAD_STORE_POST_INDEX)
+  #undef LOAD_STORE_POST_INDEX
+};
+
+// Load/store pre index.
+enum LoadStorePreIndex : uint32_t {
+  LoadStorePreIndexFixed = 0x38000C00,
+  LoadStorePreIndexFMask = 0x3B200C00,
+  LoadStorePreIndexMask  = 0xFFE00C00,
+  #define LOAD_STORE_PRE_INDEX(A, B, C, D)  \
+  A##B##_##C##_pre = LoadStorePreIndexFixed | D
+  LOAD_STORE_OP_LIST(LOAD_STORE_PRE_INDEX)
+  #undef LOAD_STORE_PRE_INDEX
+};
+
+// Load/store unsigned offset.
+enum LoadStoreUnsignedOffset : uint32_t {
+  LoadStoreUnsignedOffsetFixed = 0x39000000,
+  LoadStoreUnsignedOffsetFMask = 0x3B000000,
+  LoadStoreUnsignedOffsetMask  = 0xFFC00000,
+  PRFM_unsigned                = LoadStoreUnsignedOffsetFixed | PRFM,
+  #define LOAD_STORE_UNSIGNED_OFFSET(A, B, C, D) \
+  A##B##_##C##_unsigned = LoadStoreUnsignedOffsetFixed | D
+  LOAD_STORE_OP_LIST(LOAD_STORE_UNSIGNED_OFFSET)
+  #undef LOAD_STORE_UNSIGNED_OFFSET
+};
+
+// Load/store register offset.
+enum LoadStoreRegisterOffset : uint32_t {
+  LoadStoreRegisterOffsetFixed = 0x38200800,
+  LoadStoreRegisterOffsetFMask = 0x3B200C00,
+  LoadStoreRegisterOffsetMask  = 0xFFE00C00,
+  PRFM_reg                     = LoadStoreRegisterOffsetFixed | PRFM,
+  #define LOAD_STORE_REGISTER_OFFSET(A, B, C, D) \
+  A##B##_##C##_reg = LoadStoreRegisterOffsetFixed | D
+  LOAD_STORE_OP_LIST(LOAD_STORE_REGISTER_OFFSET)
+  #undef LOAD_STORE_REGISTER_OFFSET
+};
+
+enum LoadStoreExclusive : uint32_t {
+  LoadStoreExclusiveFixed = 0x08000000,
+  LoadStoreExclusiveFMask = 0x3F000000,
+  LoadStoreExclusiveMask  = 0xFFE08000,
+  STXRB_w  = LoadStoreExclusiveFixed | 0x00000000,
+  STXRH_w  = LoadStoreExclusiveFixed | 0x40000000,
+  STXR_w   = LoadStoreExclusiveFixed | 0x80000000,
+  STXR_x   = LoadStoreExclusiveFixed | 0xC0000000,
+  LDXRB_w  = LoadStoreExclusiveFixed | 0x00400000,
+  LDXRH_w  = LoadStoreExclusiveFixed | 0x40400000,
+  LDXR_w   = LoadStoreExclusiveFixed | 0x80400000,
+  LDXR_x   = LoadStoreExclusiveFixed | 0xC0400000,
+  STXP_w   = LoadStoreExclusiveFixed | 0x80200000,
+  STXP_x   = LoadStoreExclusiveFixed | 0xC0200000,
+  LDXP_w   = LoadStoreExclusiveFixed | 0x80600000,
+  LDXP_x   = LoadStoreExclusiveFixed | 0xC0600000,
+  STLXRB_w = LoadStoreExclusiveFixed | 0x00008000,
+  STLXRH_w = LoadStoreExclusiveFixed | 0x40008000,
+  STLXR_w  = LoadStoreExclusiveFixed | 0x80008000,
+  STLXR_x  = LoadStoreExclusiveFixed | 0xC0008000,
+  LDAXRB_w = LoadStoreExclusiveFixed | 0x00408000,
+  LDAXRH_w = LoadStoreExclusiveFixed | 0x40408000,
+  LDAXR_w  = LoadStoreExclusiveFixed | 0x80408000,
+  LDAXR_x  = LoadStoreExclusiveFixed | 0xC0408000,
+  STLXP_w  = LoadStoreExclusiveFixed | 0x80208000,
+  STLXP_x  = LoadStoreExclusiveFixed | 0xC0208000,
+  LDAXP_w  = LoadStoreExclusiveFixed | 0x80608000,
+  LDAXP_x  = LoadStoreExclusiveFixed | 0xC0608000,
+  STLRB_w  = LoadStoreExclusiveFixed | 0x00808000,
+  STLRH_w  = LoadStoreExclusiveFixed | 0x40808000,
+  STLR_w   = LoadStoreExclusiveFixed | 0x80808000,
+  STLR_x   = LoadStoreExclusiveFixed | 0xC0808000,
+  LDARB_w  = LoadStoreExclusiveFixed | 0x00C08000,
+  LDARH_w  = LoadStoreExclusiveFixed | 0x40C08000,
+  LDAR_w   = LoadStoreExclusiveFixed | 0x80C08000,
+  LDAR_x   = LoadStoreExclusiveFixed | 0xC0C08000,
+
+  // v8.1 Load/store LORegion ops
+  STLLRB   = LoadStoreExclusiveFixed | 0x00800000,
+  LDLARB   = LoadStoreExclusiveFixed | 0x00C00000,
+  STLLRH   = LoadStoreExclusiveFixed | 0x40800000,
+  LDLARH   = LoadStoreExclusiveFixed | 0x40C00000,
+  STLLR_w  = LoadStoreExclusiveFixed | 0x80800000,
+  LDLAR_w  = LoadStoreExclusiveFixed | 0x80C00000,
+  STLLR_x  = LoadStoreExclusiveFixed | 0xC0800000,
+  LDLAR_x  = LoadStoreExclusiveFixed | 0xC0C00000,
+
+  // v8.1 Load/store exclusive ops
+  LSEBit_l  = 0x00400000,
+  LSEBit_o0 = 0x00008000,
+  LSEBit_sz = 0x40000000,
+  CASFixed  = LoadStoreExclusiveFixed | 0x80A00000,
+  CASBFixed = LoadStoreExclusiveFixed | 0x00A00000,
+  CASHFixed = LoadStoreExclusiveFixed | 0x40A00000,
+  CASPFixed = LoadStoreExclusiveFixed | 0x00200000,
+  CAS_w    = CASFixed,
+  CAS_x    = CASFixed | LSEBit_sz,
+  CASA_w   = CASFixed | LSEBit_l,
+  CASA_x   = CASFixed | LSEBit_l | LSEBit_sz,
+  CASL_w   = CASFixed | LSEBit_o0,
+  CASL_x   = CASFixed | LSEBit_o0 | LSEBit_sz,
+  CASAL_w  = CASFixed | LSEBit_l | LSEBit_o0,
+  CASAL_x  = CASFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz,
+  CASB     = CASBFixed,
+  CASAB    = CASBFixed | LSEBit_l,
+  CASLB    = CASBFixed | LSEBit_o0,
+  CASALB   = CASBFixed | LSEBit_l | LSEBit_o0,
+  CASH     = CASHFixed,
+  CASAH    = CASHFixed | LSEBit_l,
+  CASLH    = CASHFixed | LSEBit_o0,
+  CASALH   = CASHFixed | LSEBit_l | LSEBit_o0,
+  CASP_w   = CASPFixed,
+  CASP_x   = CASPFixed | LSEBit_sz,
+  CASPA_w  = CASPFixed | LSEBit_l,
+  CASPA_x  = CASPFixed | LSEBit_l | LSEBit_sz,
+  CASPL_w  = CASPFixed | LSEBit_o0,
+  CASPL_x  = CASPFixed | LSEBit_o0 | LSEBit_sz,
+  CASPAL_w = CASPFixed | LSEBit_l | LSEBit_o0,
+  CASPAL_x = CASPFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz
+};
+
+// Load/store RCpc unscaled offset.
+enum LoadStoreRCpcUnscaledOffsetOp : uint32_t {
+  LoadStoreRCpcUnscaledOffsetFixed = 0x19000000,
+  LoadStoreRCpcUnscaledOffsetFMask = 0x3F200C00,
+  LoadStoreRCpcUnscaledOffsetMask  = 0xFFE00C00,
+  STLURB     = LoadStoreRCpcUnscaledOffsetFixed | 0x00000000,
+  LDAPURB    = LoadStoreRCpcUnscaledOffsetFixed | 0x00400000,
+  LDAPURSB_x = LoadStoreRCpcUnscaledOffsetFixed | 0x00800000,
+  LDAPURSB_w = LoadStoreRCpcUnscaledOffsetFixed | 0x00C00000,
+  STLURH     = LoadStoreRCpcUnscaledOffsetFixed | 0x40000000,
+  LDAPURH    = LoadStoreRCpcUnscaledOffsetFixed | 0x40400000,
+  LDAPURSH_x = LoadStoreRCpcUnscaledOffsetFixed | 0x40800000,
+  LDAPURSH_w = LoadStoreRCpcUnscaledOffsetFixed | 0x40C00000,
+  STLUR_w    = LoadStoreRCpcUnscaledOffsetFixed | 0x80000000,
+  LDAPUR_w   = LoadStoreRCpcUnscaledOffsetFixed | 0x80400000,
+  LDAPURSW   = LoadStoreRCpcUnscaledOffsetFixed | 0x80800000,
+  STLUR_x    = LoadStoreRCpcUnscaledOffsetFixed | 0xC0000000,
+  LDAPUR_x   = LoadStoreRCpcUnscaledOffsetFixed | 0xC0400000
+};
+
+#define ATOMIC_MEMORY_SIMPLE_OPC_LIST(V) \
+  V(LDADD, 0x00000000),                  \
+  V(LDCLR, 0x00001000),                  \
+  V(LDEOR, 0x00002000),                  \
+  V(LDSET, 0x00003000),                  \
+  V(LDSMAX, 0x00004000),                 \
+  V(LDSMIN, 0x00005000),                 \
+  V(LDUMAX, 0x00006000),                 \
+  V(LDUMIN, 0x00007000)
+
+// Atomic memory.
+enum AtomicMemoryOp : uint32_t {
+  AtomicMemoryFixed = 0x38200000,
+  AtomicMemoryFMask = 0x3B200C00,
+  AtomicMemoryMask = 0xFFE0FC00,
+  SWPB = AtomicMemoryFixed | 0x00008000,
+  SWPAB = AtomicMemoryFixed | 0x00808000,
+  SWPLB = AtomicMemoryFixed | 0x00408000,
+  SWPALB = AtomicMemoryFixed | 0x00C08000,
+  SWPH = AtomicMemoryFixed | 0x40008000,
+  SWPAH = AtomicMemoryFixed | 0x40808000,
+  SWPLH = AtomicMemoryFixed | 0x40408000,
+  SWPALH = AtomicMemoryFixed | 0x40C08000,
+  SWP_w = AtomicMemoryFixed | 0x80008000,
+  SWPA_w = AtomicMemoryFixed | 0x80808000,
+  SWPL_w = AtomicMemoryFixed | 0x80408000,
+  SWPAL_w = AtomicMemoryFixed | 0x80C08000,
+  SWP_x = AtomicMemoryFixed | 0xC0008000,
+  SWPA_x = AtomicMemoryFixed | 0xC0808000,
+  SWPL_x = AtomicMemoryFixed | 0xC0408000,
+  SWPAL_x = AtomicMemoryFixed | 0xC0C08000,
+  LDAPRB = AtomicMemoryFixed | 0x0080C000,
+  LDAPRH = AtomicMemoryFixed | 0x4080C000,
+  LDAPR_w = AtomicMemoryFixed | 0x8080C000,
+  LDAPR_x = AtomicMemoryFixed | 0xC080C000,
+
+  AtomicMemorySimpleFMask = 0x3B208C00,
+  AtomicMemorySimpleOpMask = 0x00007000,
+#define ATOMIC_MEMORY_SIMPLE(N, OP)              \
+  N##Op = OP,                                    \
+  N##B = AtomicMemoryFixed | OP,                 \
+  N##AB = AtomicMemoryFixed | OP | 0x00800000,   \
+  N##LB = AtomicMemoryFixed | OP | 0x00400000,   \
+  N##ALB = AtomicMemoryFixed | OP | 0x00C00000,  \
+  N##H = AtomicMemoryFixed | OP | 0x40000000,    \
+  N##AH = AtomicMemoryFixed | OP | 0x40800000,   \
+  N##LH = AtomicMemoryFixed | OP | 0x40400000,   \
+  N##ALH = AtomicMemoryFixed | OP | 0x40C00000,  \
+  N##_w = AtomicMemoryFixed | OP | 0x80000000,   \
+  N##A_w = AtomicMemoryFixed | OP | 0x80800000,  \
+  N##L_w = AtomicMemoryFixed | OP | 0x80400000,  \
+  N##AL_w = AtomicMemoryFixed | OP | 0x80C00000, \
+  N##_x = AtomicMemoryFixed | OP | 0xC0000000,   \
+  N##A_x = AtomicMemoryFixed | OP | 0xC0800000,  \
+  N##L_x = AtomicMemoryFixed | OP | 0xC0400000,  \
+  N##AL_x = AtomicMemoryFixed | OP | 0xC0C00000
+
+  ATOMIC_MEMORY_SIMPLE_OPC_LIST(ATOMIC_MEMORY_SIMPLE)
+#undef ATOMIC_MEMORY_SIMPLE
+};
+
+// Conditional compare.
+enum ConditionalCompareOp : uint32_t {
+  ConditionalCompareMask = 0x60000000,
+  CCMN                   = 0x20000000,
+  CCMP                   = 0x60000000
+};
+
+// Conditional compare register.
+enum ConditionalCompareRegisterOp : uint32_t {
+  ConditionalCompareRegisterFixed = 0x1A400000,
+  ConditionalCompareRegisterFMask = 0x1FE00800,
+  ConditionalCompareRegisterMask  = 0xFFE00C10,
+  CCMN_w = ConditionalCompareRegisterFixed | CCMN,
+  CCMN_x = ConditionalCompareRegisterFixed | SixtyFourBits | CCMN,
+  CCMP_w = ConditionalCompareRegisterFixed | CCMP,
+  CCMP_x = ConditionalCompareRegisterFixed | SixtyFourBits | CCMP
+};
+
+// Conditional compare immediate.
+enum ConditionalCompareImmediateOp : uint32_t {
+  ConditionalCompareImmediateFixed = 0x1A400800,
+  ConditionalCompareImmediateFMask = 0x1FE00800,
+  ConditionalCompareImmediateMask  = 0xFFE00C10,
+  CCMN_w_imm = ConditionalCompareImmediateFixed | CCMN,
+  CCMN_x_imm = ConditionalCompareImmediateFixed | SixtyFourBits | CCMN,
+  CCMP_w_imm = ConditionalCompareImmediateFixed | CCMP,
+  CCMP_x_imm = ConditionalCompareImmediateFixed | SixtyFourBits | CCMP
+};
+
+// Conditional select.
+enum ConditionalSelectOp : uint32_t {
+  ConditionalSelectFixed = 0x1A800000,
+  ConditionalSelectFMask = 0x1FE00000,
+  ConditionalSelectMask  = 0xFFE00C00,
+  CSEL_w                 = ConditionalSelectFixed | 0x00000000,
+  CSEL_x                 = ConditionalSelectFixed | 0x80000000,
+  CSEL                   = CSEL_w,
+  CSINC_w                = ConditionalSelectFixed | 0x00000400,
+  CSINC_x                = ConditionalSelectFixed | 0x80000400,
+  CSINC                  = CSINC_w,
+  CSINV_w                = ConditionalSelectFixed | 0x40000000,
+  CSINV_x                = ConditionalSelectFixed | 0xC0000000,
+  CSINV                  = CSINV_w,
+  CSNEG_w                = ConditionalSelectFixed | 0x40000400,
+  CSNEG_x                = ConditionalSelectFixed | 0xC0000400,
+  CSNEG                  = CSNEG_w
+};
+
+// Data processing 1 source.
+enum DataProcessing1SourceOp : uint32_t {
+  DataProcessing1SourceFixed = 0x5AC00000,
+  DataProcessing1SourceFMask = 0x5FE00000,
+  DataProcessing1SourceMask  = 0xFFFFFC00,
+  RBIT    = DataProcessing1SourceFixed | 0x00000000,
+  RBIT_w  = RBIT,
+  RBIT_x  = RBIT | SixtyFourBits,
+  REV16   = DataProcessing1SourceFixed | 0x00000400,
+  REV16_w = REV16,
+  REV16_x = REV16 | SixtyFourBits,
+  REV     = DataProcessing1SourceFixed | 0x00000800,
+  REV_w   = REV,
+  REV32_x = REV | SixtyFourBits,
+  REV_x   = DataProcessing1SourceFixed | SixtyFourBits | 0x00000C00,
+  CLZ     = DataProcessing1SourceFixed | 0x00001000,
+  CLZ_w   = CLZ,
+  CLZ_x   = CLZ | SixtyFourBits,
+  CLS     = DataProcessing1SourceFixed | 0x00001400,
+  CLS_w   = CLS,
+  CLS_x   = CLS | SixtyFourBits,
+
+  // Pointer authentication instructions in Armv8.3.
+  PACIA  = DataProcessing1SourceFixed | 0x80010000,
+  PACIB  = DataProcessing1SourceFixed | 0x80010400,
+  PACDA  = DataProcessing1SourceFixed | 0x80010800,
+  PACDB  = DataProcessing1SourceFixed | 0x80010C00,
+  AUTIA  = DataProcessing1SourceFixed | 0x80011000,
+  AUTIB  = DataProcessing1SourceFixed | 0x80011400,
+  AUTDA  = DataProcessing1SourceFixed | 0x80011800,
+  AUTDB  = DataProcessing1SourceFixed | 0x80011C00,
+  PACIZA = DataProcessing1SourceFixed | 0x80012000,
+  PACIZB = DataProcessing1SourceFixed | 0x80012400,
+  PACDZA = DataProcessing1SourceFixed | 0x80012800,
+  PACDZB = DataProcessing1SourceFixed | 0x80012C00,
+  AUTIZA = DataProcessing1SourceFixed | 0x80013000,
+  AUTIZB = DataProcessing1SourceFixed | 0x80013400,
+  AUTDZA = DataProcessing1SourceFixed | 0x80013800,
+  AUTDZB = DataProcessing1SourceFixed | 0x80013C00,
+  XPACI  = DataProcessing1SourceFixed | 0x80014000,
+  XPACD  = DataProcessing1SourceFixed | 0x80014400
+};
+
+// Data processing 2 source.
+enum DataProcessing2SourceOp : uint32_t {
+  DataProcessing2SourceFixed = 0x1AC00000,
+  DataProcessing2SourceFMask = 0x5FE00000,
+  DataProcessing2SourceMask  = 0xFFE0FC00,
+  UDIV_w  = DataProcessing2SourceFixed | 0x00000800,
+  UDIV_x  = DataProcessing2SourceFixed | 0x80000800,
+  UDIV    = UDIV_w,
+  SDIV_w  = DataProcessing2SourceFixed | 0x00000C00,
+  SDIV_x  = DataProcessing2SourceFixed | 0x80000C00,
+  SDIV    = SDIV_w,
+  LSLV_w  = DataProcessing2SourceFixed | 0x00002000,
+  LSLV_x  = DataProcessing2SourceFixed | 0x80002000,
+  LSLV    = LSLV_w,
+  LSRV_w  = DataProcessing2SourceFixed | 0x00002400,
+  LSRV_x  = DataProcessing2SourceFixed | 0x80002400,
+  LSRV    = LSRV_w,
+  ASRV_w  = DataProcessing2SourceFixed | 0x00002800,
+  ASRV_x  = DataProcessing2SourceFixed | 0x80002800,
+  ASRV    = ASRV_w,
+  RORV_w  = DataProcessing2SourceFixed | 0x00002C00,
+  RORV_x  = DataProcessing2SourceFixed | 0x80002C00,
+  RORV    = RORV_w,
+  PACGA   = DataProcessing2SourceFixed | SixtyFourBits | 0x00003000,
+  CRC32B  = DataProcessing2SourceFixed | 0x00004000,
+  CRC32H  = DataProcessing2SourceFixed | 0x00004400,
+  CRC32W  = DataProcessing2SourceFixed | 0x00004800,
+  CRC32X  = DataProcessing2SourceFixed | SixtyFourBits | 0x00004C00,
+  CRC32CB = DataProcessing2SourceFixed | 0x00005000,
+  CRC32CH = DataProcessing2SourceFixed | 0x00005400,
+  CRC32CW = DataProcessing2SourceFixed | 0x00005800,
+  CRC32CX = DataProcessing2SourceFixed | SixtyFourBits | 0x00005C00
+};
+
+// Data processing 3 source.
+enum DataProcessing3SourceOp : uint32_t {
+  DataProcessing3SourceFixed = 0x1B000000,
+  DataProcessing3SourceFMask = 0x1F000000,
+  DataProcessing3SourceMask  = 0xFFE08000,
+  MADD_w                     = DataProcessing3SourceFixed | 0x00000000,
+  MADD_x                     = DataProcessing3SourceFixed | 0x80000000,
+  MADD                       = MADD_w,
+  MSUB_w                     = DataProcessing3SourceFixed | 0x00008000,
+  MSUB_x                     = DataProcessing3SourceFixed | 0x80008000,
+  MSUB                       = MSUB_w,
+  SMADDL_x                   = DataProcessing3SourceFixed | 0x80200000,
+  SMSUBL_x                   = DataProcessing3SourceFixed | 0x80208000,
+  SMULH_x                    = DataProcessing3SourceFixed | 0x80400000,
+  UMADDL_x                   = DataProcessing3SourceFixed | 0x80A00000,
+  UMSUBL_x                   = DataProcessing3SourceFixed | 0x80A08000,
+  UMULH_x                    = DataProcessing3SourceFixed | 0x80C00000
+};
+
+// Floating point compare.
+enum FPCompareOp : uint32_t {
+  FPCompareFixed = 0x1E202000,
+  FPCompareFMask = 0x5F203C00,
+  FPCompareMask  = 0xFFE0FC1F,
+  FCMP_h         = FPCompareFixed | FP16 | 0x00000000,
+  FCMP_s         = FPCompareFixed | 0x00000000,
+  FCMP_d         = FPCompareFixed | FP64 | 0x00000000,
+  FCMP           = FCMP_s,
+  FCMP_h_zero    = FPCompareFixed | FP16 | 0x00000008,
+  FCMP_s_zero    = FPCompareFixed | 0x00000008,
+  FCMP_d_zero    = FPCompareFixed | FP64 | 0x00000008,
+  FCMP_zero      = FCMP_s_zero,
+  FCMPE_h        = FPCompareFixed | FP16 | 0x00000010,
+  FCMPE_s        = FPCompareFixed | 0x00000010,
+  FCMPE_d        = FPCompareFixed | FP64 | 0x00000010,
+  FCMPE          = FCMPE_s,
+  FCMPE_h_zero   = FPCompareFixed | FP16 | 0x00000018,
+  FCMPE_s_zero   = FPCompareFixed | 0x00000018,
+  FCMPE_d_zero   = FPCompareFixed | FP64 | 0x00000018,
+  FCMPE_zero     = FCMPE_s_zero
+};
+
+// Floating point conditional compare.
+enum FPConditionalCompareOp : uint32_t {
+  FPConditionalCompareFixed = 0x1E200400,
+  FPConditionalCompareFMask = 0x5F200C00,
+  FPConditionalCompareMask  = 0xFFE00C10,
+  FCCMP_h                   = FPConditionalCompareFixed | FP16 | 0x00000000,
+  FCCMP_s                   = FPConditionalCompareFixed | 0x00000000,
+  FCCMP_d                   = FPConditionalCompareFixed | FP64 | 0x00000000,
+  FCCMP                     = FCCMP_s,
+  FCCMPE_h                  = FPConditionalCompareFixed | FP16 | 0x00000010,
+  FCCMPE_s                  = FPConditionalCompareFixed | 0x00000010,
+  FCCMPE_d                  = FPConditionalCompareFixed | FP64 | 0x00000010,
+  FCCMPE                    = FCCMPE_s
+};
+
+// Floating point conditional select.
+enum FPConditionalSelectOp : uint32_t {
+  FPConditionalSelectFixed = 0x1E200C00,
+  FPConditionalSelectFMask = 0x5F200C00,
+  FPConditionalSelectMask  = 0xFFE00C00,
+  FCSEL_h                  = FPConditionalSelectFixed | FP16 | 0x00000000,
+  FCSEL_s                  = FPConditionalSelectFixed | 0x00000000,
+  FCSEL_d                  = FPConditionalSelectFixed | FP64 | 0x00000000,
+  FCSEL                    = FCSEL_s
+};
+
+// Floating point immediate.
+enum FPImmediateOp : uint32_t {
+  FPImmediateFixed = 0x1E201000,
+  FPImmediateFMask = 0x5F201C00,
+  FPImmediateMask  = 0xFFE01C00,
+  FMOV_h_imm       = FPImmediateFixed | FP16 | 0x00000000,
+  FMOV_s_imm       = FPImmediateFixed | 0x00000000,
+  FMOV_d_imm       = FPImmediateFixed | FP64 | 0x00000000
+};
+
+// Floating point data processing 1 source.
+enum FPDataProcessing1SourceOp : uint32_t {
+  FPDataProcessing1SourceFixed = 0x1E204000,
+  FPDataProcessing1SourceFMask = 0x5F207C00,
+  FPDataProcessing1SourceMask  = 0xFFFFFC00,
+  FMOV_h   = FPDataProcessing1SourceFixed | FP16 | 0x00000000,
+  FMOV_s   = FPDataProcessing1SourceFixed | 0x00000000,
+  FMOV_d   = FPDataProcessing1SourceFixed | FP64 | 0x00000000,
+  FMOV     = FMOV_s,
+  FABS_h   = FPDataProcessing1SourceFixed | FP16 | 0x00008000,
+  FABS_s   = FPDataProcessing1SourceFixed | 0x00008000,
+  FABS_d   = FPDataProcessing1SourceFixed | FP64 | 0x00008000,
+  FABS     = FABS_s,
+  FNEG_h   = FPDataProcessing1SourceFixed | FP16 | 0x00010000,
+  FNEG_s   = FPDataProcessing1SourceFixed | 0x00010000,
+  FNEG_d   = FPDataProcessing1SourceFixed | FP64 | 0x00010000,
+  FNEG     = FNEG_s,
+  FSQRT_h  = FPDataProcessing1SourceFixed | FP16 | 0x00018000,
+  FSQRT_s  = FPDataProcessing1SourceFixed | 0x00018000,
+  FSQRT_d  = FPDataProcessing1SourceFixed | FP64 | 0x00018000,
+  FSQRT    = FSQRT_s,
+  FCVT_ds  = FPDataProcessing1SourceFixed | 0x00028000,
+  FCVT_sd  = FPDataProcessing1SourceFixed | FP64 | 0x00020000,
+  FCVT_hs  = FPDataProcessing1SourceFixed | 0x00038000,
+  FCVT_hd  = FPDataProcessing1SourceFixed | FP64 | 0x00038000,
+  FCVT_sh  = FPDataProcessing1SourceFixed | 0x00C20000,
+  FCVT_dh  = FPDataProcessing1SourceFixed | 0x00C28000,
+  FRINT32X_s = FPDataProcessing1SourceFixed | 0x00088000,
+  FRINT32X_d = FPDataProcessing1SourceFixed | FP64 | 0x00088000,
+  FRINT32X = FRINT32X_s,
+  FRINT32Z_s = FPDataProcessing1SourceFixed | 0x00080000,
+  FRINT32Z_d = FPDataProcessing1SourceFixed | FP64 | 0x00080000,
+  FRINT32Z = FRINT32Z_s,
+  FRINT64X_s = FPDataProcessing1SourceFixed | 0x00098000,
+  FRINT64X_d = FPDataProcessing1SourceFixed | FP64 | 0x00098000,
+  FRINT64X = FRINT64X_s,
+  FRINT64Z_s = FPDataProcessing1SourceFixed | 0x00090000,
+  FRINT64Z_d = FPDataProcessing1SourceFixed | FP64 | 0x00090000,
+  FRINT64Z = FRINT64Z_s,
+  FRINTN_h = FPDataProcessing1SourceFixed | FP16 | 0x00040000,
+  FRINTN_s = FPDataProcessing1SourceFixed | 0x00040000,
+  FRINTN_d = FPDataProcessing1SourceFixed | FP64 | 0x00040000,
+  FRINTN   = FRINTN_s,
+  FRINTP_h = FPDataProcessing1SourceFixed | FP16 | 0x00048000,
+  FRINTP_s = FPDataProcessing1SourceFixed | 0x00048000,
+  FRINTP_d = FPDataProcessing1SourceFixed | FP64 | 0x00048000,
+  FRINTP   = FRINTP_s,
+  FRINTM_h = FPDataProcessing1SourceFixed | FP16 | 0x00050000,
+  FRINTM_s = FPDataProcessing1SourceFixed | 0x00050000,
+  FRINTM_d = FPDataProcessing1SourceFixed | FP64 | 0x00050000,
+  FRINTM   = FRINTM_s,
+  FRINTZ_h = FPDataProcessing1SourceFixed | FP16 | 0x00058000,
+  FRINTZ_s = FPDataProcessing1SourceFixed | 0x00058000,
+  FRINTZ_d = FPDataProcessing1SourceFixed | FP64 | 0x00058000,
+  FRINTZ   = FRINTZ_s,
+  FRINTA_h = FPDataProcessing1SourceFixed | FP16 | 0x00060000,
+  FRINTA_s = FPDataProcessing1SourceFixed | 0x00060000,
+  FRINTA_d = FPDataProcessing1SourceFixed | FP64 | 0x00060000,
+  FRINTA   = FRINTA_s,
+  FRINTX_h = FPDataProcessing1SourceFixed | FP16 | 0x00070000,
+  FRINTX_s = FPDataProcessing1SourceFixed | 0x00070000,
+  FRINTX_d = FPDataProcessing1SourceFixed | FP64 | 0x00070000,
+  FRINTX   = FRINTX_s,
+  FRINTI_h = FPDataProcessing1SourceFixed | FP16 | 0x00078000,
+  FRINTI_s = FPDataProcessing1SourceFixed | 0x00078000,
+  FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000,
+  FRINTI   = FRINTI_s
+};
+
+// Floating point data processing 2 source.
+enum FPDataProcessing2SourceOp : uint32_t {
+  FPDataProcessing2SourceFixed = 0x1E200800,
+  FPDataProcessing2SourceFMask = 0x5F200C00,
+  FPDataProcessing2SourceMask  = 0xFFE0FC00,
+  FMUL     = FPDataProcessing2SourceFixed | 0x00000000,
+  FMUL_h   = FMUL | FP16,
+  FMUL_s   = FMUL,
+  FMUL_d   = FMUL | FP64,
+  FDIV     = FPDataProcessing2SourceFixed | 0x00001000,
+  FDIV_h   = FDIV | FP16,
+  FDIV_s   = FDIV,
+  FDIV_d   = FDIV | FP64,
+  FADD     = FPDataProcessing2SourceFixed | 0x00002000,
+  FADD_h   = FADD | FP16,
+  FADD_s   = FADD,
+  FADD_d   = FADD | FP64,
+  FSUB     = FPDataProcessing2SourceFixed | 0x00003000,
+  FSUB_h   = FSUB | FP16,
+  FSUB_s   = FSUB,
+  FSUB_d   = FSUB | FP64,
+  FMAX     = FPDataProcessing2SourceFixed | 0x00004000,
+  FMAX_h   = FMAX | FP16,
+  FMAX_s   = FMAX,
+  FMAX_d   = FMAX | FP64,
+  FMIN     = FPDataProcessing2SourceFixed | 0x00005000,
+  FMIN_h   = FMIN | FP16,
+  FMIN_s   = FMIN,
+  FMIN_d   = FMIN | FP64,
+  FMAXNM   = FPDataProcessing2SourceFixed | 0x00006000,
+  FMAXNM_h = FMAXNM | FP16,
+  FMAXNM_s = FMAXNM,
+  FMAXNM_d = FMAXNM | FP64,
+  FMINNM   = FPDataProcessing2SourceFixed | 0x00007000,
+  FMINNM_h = FMINNM | FP16,
+  FMINNM_s = FMINNM,
+  FMINNM_d = FMINNM | FP64,
+  FNMUL    = FPDataProcessing2SourceFixed | 0x00008000,
+  FNMUL_h  = FNMUL | FP16,
+  FNMUL_s  = FNMUL,
+  FNMUL_d  = FNMUL | FP64
+};
+
+// Floating point data processing 3 source.
+enum FPDataProcessing3SourceOp : uint32_t {
+  FPDataProcessing3SourceFixed = 0x1F000000,
+  FPDataProcessing3SourceFMask = 0x5F000000,
+  FPDataProcessing3SourceMask  = 0xFFE08000,
+  FMADD_h                      = FPDataProcessing3SourceFixed | 0x00C00000,
+  FMSUB_h                      = FPDataProcessing3SourceFixed | 0x00C08000,
+  FNMADD_h                     = FPDataProcessing3SourceFixed | 0x00E00000,
+  FNMSUB_h                     = FPDataProcessing3SourceFixed | 0x00E08000,
+  FMADD_s                      = FPDataProcessing3SourceFixed | 0x00000000,
+  FMSUB_s                      = FPDataProcessing3SourceFixed | 0x00008000,
+  FNMADD_s                     = FPDataProcessing3SourceFixed | 0x00200000,
+  FNMSUB_s                     = FPDataProcessing3SourceFixed | 0x00208000,
+  FMADD_d                      = FPDataProcessing3SourceFixed | 0x00400000,
+  FMSUB_d                      = FPDataProcessing3SourceFixed | 0x00408000,
+  FNMADD_d                     = FPDataProcessing3SourceFixed | 0x00600000,
+  FNMSUB_d                     = FPDataProcessing3SourceFixed | 0x00608000
+};
+
+// Conversion between floating point and integer.
+enum FPIntegerConvertOp : uint32_t {
+  FPIntegerConvertFixed = 0x1E200000,
+  FPIntegerConvertFMask = 0x5F20FC00,
+  FPIntegerConvertMask  = 0xFFFFFC00,
+  FCVTNS    = FPIntegerConvertFixed | 0x00000000,
+  FCVTNS_wh = FCVTNS | FP16,
+  FCVTNS_xh = FCVTNS | SixtyFourBits | FP16,
+  FCVTNS_ws = FCVTNS,
+  FCVTNS_xs = FCVTNS | SixtyFourBits,
+  FCVTNS_wd = FCVTNS | FP64,
+  FCVTNS_xd = FCVTNS | SixtyFourBits | FP64,
+  FCVTNU    = FPIntegerConvertFixed | 0x00010000,
+  FCVTNU_wh = FCVTNU | FP16,
+  FCVTNU_xh = FCVTNU | SixtyFourBits | FP16,
+  FCVTNU_ws = FCVTNU,
+  FCVTNU_xs = FCVTNU | SixtyFourBits,
+  FCVTNU_wd = FCVTNU | FP64,
+  FCVTNU_xd = FCVTNU | SixtyFourBits | FP64,
+  FCVTPS    = FPIntegerConvertFixed | 0x00080000,
+  FCVTPS_wh = FCVTPS | FP16,
+  FCVTPS_xh = FCVTPS | SixtyFourBits | FP16,
+  FCVTPS_ws = FCVTPS,
+  FCVTPS_xs = FCVTPS | SixtyFourBits,
+  FCVTPS_wd = FCVTPS | FP64,
+  FCVTPS_xd = FCVTPS | SixtyFourBits | FP64,
+  FCVTPU    = FPIntegerConvertFixed | 0x00090000,
+  FCVTPU_wh = FCVTPU | FP16,
+  FCVTPU_xh = FCVTPU | SixtyFourBits | FP16,
+  FCVTPU_ws = FCVTPU,
+  FCVTPU_xs = FCVTPU | SixtyFourBits,
+  FCVTPU_wd = FCVTPU | FP64,
+  FCVTPU_xd = FCVTPU | SixtyFourBits | FP64,
+  FCVTMS    = FPIntegerConvertFixed | 0x00100000,
+  FCVTMS_wh = FCVTMS | FP16,
+  FCVTMS_xh = FCVTMS | SixtyFourBits | FP16,
+  FCVTMS_ws = FCVTMS,
+  FCVTMS_xs = FCVTMS | SixtyFourBits,
+  FCVTMS_wd = FCVTMS | FP64,
+  FCVTMS_xd = FCVTMS | SixtyFourBits | FP64,
+  FCVTMU    = FPIntegerConvertFixed | 0x00110000,
+  FCVTMU_wh = FCVTMU | FP16,
+  FCVTMU_xh = FCVTMU | SixtyFourBits | FP16,
+  FCVTMU_ws = FCVTMU,
+  FCVTMU_xs = FCVTMU | SixtyFourBits,
+  FCVTMU_wd = FCVTMU | FP64,
+  FCVTMU_xd = FCVTMU | SixtyFourBits | FP64,
+  FCVTZS    = FPIntegerConvertFixed | 0x00180000,
+  FCVTZS_wh = FCVTZS | FP16,
+  FCVTZS_xh = FCVTZS | SixtyFourBits | FP16,
+  FCVTZS_ws = FCVTZS,
+  FCVTZS_xs = FCVTZS | SixtyFourBits,
+  FCVTZS_wd = FCVTZS | FP64,
+  FCVTZS_xd = FCVTZS | SixtyFourBits | FP64,
+  FCVTZU    = FPIntegerConvertFixed | 0x00190000,
+  FCVTZU_wh = FCVTZU | FP16,
+  FCVTZU_xh = FCVTZU | SixtyFourBits | FP16,
+  FCVTZU_ws = FCVTZU,
+  FCVTZU_xs = FCVTZU | SixtyFourBits,
+  FCVTZU_wd = FCVTZU | FP64,
+  FCVTZU_xd = FCVTZU | SixtyFourBits | FP64,
+  SCVTF     = FPIntegerConvertFixed | 0x00020000,
+  SCVTF_hw  = SCVTF | FP16,
+  SCVTF_hx  = SCVTF | SixtyFourBits | FP16,
+  SCVTF_sw  = SCVTF,
+  SCVTF_sx  = SCVTF | SixtyFourBits,
+  SCVTF_dw  = SCVTF | FP64,
+  SCVTF_dx  = SCVTF | SixtyFourBits | FP64,
+  UCVTF     = FPIntegerConvertFixed | 0x00030000,
+  UCVTF_hw  = UCVTF | FP16,
+  UCVTF_hx  = UCVTF | SixtyFourBits | FP16,
+  UCVTF_sw  = UCVTF,
+  UCVTF_sx  = UCVTF | SixtyFourBits,
+  UCVTF_dw  = UCVTF | FP64,
+  UCVTF_dx  = UCVTF | SixtyFourBits | FP64,
+  FCVTAS    = FPIntegerConvertFixed | 0x00040000,
+  FCVTAS_wh = FCVTAS | FP16,
+  FCVTAS_xh = FCVTAS | SixtyFourBits | FP16,
+  FCVTAS_ws = FCVTAS,
+  FCVTAS_xs = FCVTAS | SixtyFourBits,
+  FCVTAS_wd = FCVTAS | FP64,
+  FCVTAS_xd = FCVTAS | SixtyFourBits | FP64,
+  FCVTAU    = FPIntegerConvertFixed | 0x00050000,
+  FCVTAU_wh = FCVTAU | FP16,
+  FCVTAU_xh = FCVTAU | SixtyFourBits | FP16,
+  FCVTAU_ws = FCVTAU,
+  FCVTAU_xs = FCVTAU | SixtyFourBits,
+  FCVTAU_wd = FCVTAU | FP64,
+  FCVTAU_xd = FCVTAU | SixtyFourBits | FP64,
+  FMOV_wh   = FPIntegerConvertFixed | 0x00060000 | FP16,
+  FMOV_hw   = FPIntegerConvertFixed | 0x00070000 | FP16,
+  FMOV_xh   = FMOV_wh | SixtyFourBits,
+  FMOV_hx   = FMOV_hw | SixtyFourBits,
+  FMOV_ws   = FPIntegerConvertFixed | 0x00060000,
+  FMOV_sw   = FPIntegerConvertFixed | 0x00070000,
+  FMOV_xd   = FMOV_ws | SixtyFourBits | FP64,
+  FMOV_dx   = FMOV_sw | SixtyFourBits | FP64,
+  FMOV_d1_x = FPIntegerConvertFixed | SixtyFourBits | 0x008F0000,
+  FMOV_x_d1 = FPIntegerConvertFixed | SixtyFourBits | 0x008E0000,
+  FJCVTZS   = FPIntegerConvertFixed | FP64 | 0x001E0000
+};
+
+// Conversion between fixed point and floating point.
+enum FPFixedPointConvertOp : uint32_t {
+  FPFixedPointConvertFixed = 0x1E000000,
+  FPFixedPointConvertFMask = 0x5F200000,
+  FPFixedPointConvertMask  = 0xFFFF0000,
+  FCVTZS_fixed    = FPFixedPointConvertFixed | 0x00180000,
+  FCVTZS_wh_fixed = FCVTZS_fixed | FP16,
+  FCVTZS_xh_fixed = FCVTZS_fixed | SixtyFourBits | FP16,
+  FCVTZS_ws_fixed = FCVTZS_fixed,
+  FCVTZS_xs_fixed = FCVTZS_fixed | SixtyFourBits,
+  FCVTZS_wd_fixed = FCVTZS_fixed | FP64,
+  FCVTZS_xd_fixed = FCVTZS_fixed | SixtyFourBits | FP64,
+  FCVTZU_fixed    = FPFixedPointConvertFixed | 0x00190000,
+  FCVTZU_wh_fixed = FCVTZU_fixed | FP16,
+  FCVTZU_xh_fixed = FCVTZU_fixed | SixtyFourBits | FP16,
+  FCVTZU_ws_fixed = FCVTZU_fixed,
+  FCVTZU_xs_fixed = FCVTZU_fixed | SixtyFourBits,
+  FCVTZU_wd_fixed = FCVTZU_fixed | FP64,
+  FCVTZU_xd_fixed = FCVTZU_fixed | SixtyFourBits | FP64,
+  SCVTF_fixed     = FPFixedPointConvertFixed | 0x00020000,
+  SCVTF_hw_fixed  = SCVTF_fixed | FP16,
+  SCVTF_hx_fixed  = SCVTF_fixed | SixtyFourBits | FP16,
+  SCVTF_sw_fixed  = SCVTF_fixed,
+  SCVTF_sx_fixed  = SCVTF_fixed | SixtyFourBits,
+  SCVTF_dw_fixed  = SCVTF_fixed | FP64,
+  SCVTF_dx_fixed  = SCVTF_fixed | SixtyFourBits | FP64,
+  UCVTF_fixed     = FPFixedPointConvertFixed | 0x00030000,
+  UCVTF_hw_fixed  = UCVTF_fixed | FP16,
+  UCVTF_hx_fixed  = UCVTF_fixed | SixtyFourBits | FP16,
+  UCVTF_sw_fixed  = UCVTF_fixed,
+  UCVTF_sx_fixed  = UCVTF_fixed | SixtyFourBits,
+  UCVTF_dw_fixed  = UCVTF_fixed | FP64,
+  UCVTF_dx_fixed  = UCVTF_fixed | SixtyFourBits | FP64
+};
+
+// Crypto - two register SHA.
+enum Crypto2RegSHAOp : uint32_t {
+  Crypto2RegSHAFixed = 0x5E280800,
+  Crypto2RegSHAFMask = 0xFF3E0C00
+};
+
+// Crypto - three register SHA.
+enum Crypto3RegSHAOp : uint32_t {
+  Crypto3RegSHAFixed = 0x5E000000,
+  Crypto3RegSHAFMask = 0xFF208C00
+};
+
+// Crypto - AES.
+enum CryptoAESOp : uint32_t {
+  CryptoAESFixed = 0x4E280800,
+  CryptoAESFMask = 0xFF3E0C00
+};
+
+// NEON instructions with two register operands.
+enum NEON2RegMiscOp : uint32_t {
+  NEON2RegMiscFixed = 0x0E200800,
+  NEON2RegMiscFMask = 0x9F3E0C00,
+  NEON2RegMiscMask  = 0xBF3FFC00,
+  NEON2RegMiscUBit  = 0x20000000,
+  NEON_REV64     = NEON2RegMiscFixed | 0x00000000,
+  NEON_REV32     = NEON2RegMiscFixed | 0x20000000,
+  NEON_REV16     = NEON2RegMiscFixed | 0x00001000,
+  NEON_SADDLP    = NEON2RegMiscFixed | 0x00002000,
+  NEON_UADDLP    = NEON_SADDLP | NEON2RegMiscUBit,
+  NEON_SUQADD    = NEON2RegMiscFixed | 0x00003000,
+  NEON_USQADD    = NEON_SUQADD | NEON2RegMiscUBit,
+  NEON_CLS       = NEON2RegMiscFixed | 0x00004000,
+  NEON_CLZ       = NEON2RegMiscFixed | 0x20004000,
+  NEON_CNT       = NEON2RegMiscFixed | 0x00005000,
+  NEON_RBIT_NOT  = NEON2RegMiscFixed | 0x20005000,
+  NEON_SADALP    = NEON2RegMiscFixed | 0x00006000,
+  NEON_UADALP    = NEON_SADALP | NEON2RegMiscUBit,
+  NEON_SQABS     = NEON2RegMiscFixed | 0x00007000,
+  NEON_SQNEG     = NEON2RegMiscFixed | 0x20007000,
+  NEON_CMGT_zero = NEON2RegMiscFixed | 0x00008000,
+  NEON_CMGE_zero = NEON2RegMiscFixed | 0x20008000,
+  NEON_CMEQ_zero = NEON2RegMiscFixed | 0x00009000,
+  NEON_CMLE_zero = NEON2RegMiscFixed | 0x20009000,
+  NEON_CMLT_zero = NEON2RegMiscFixed | 0x0000A000,
+  NEON_ABS       = NEON2RegMiscFixed | 0x0000B000,
+  NEON_NEG       = NEON2RegMiscFixed | 0x2000B000,
+  NEON_XTN       = NEON2RegMiscFixed | 0x00012000,
+  NEON_SQXTUN    = NEON2RegMiscFixed | 0x20012000,
+  NEON_SHLL      = NEON2RegMiscFixed | 0x20013000,
+  NEON_SQXTN     = NEON2RegMiscFixed | 0x00014000,
+  NEON_UQXTN     = NEON_SQXTN | NEON2RegMiscUBit,
+
+  NEON2RegMiscOpcode = 0x0001F000,
+  NEON_RBIT_NOT_opcode = NEON_RBIT_NOT & NEON2RegMiscOpcode,
+  NEON_NEG_opcode = NEON_NEG & NEON2RegMiscOpcode,
+  NEON_XTN_opcode = NEON_XTN & NEON2RegMiscOpcode,
+  NEON_UQXTN_opcode = NEON_UQXTN & NEON2RegMiscOpcode,
+
+  // These instructions use only one bit of the size field. The other bit is
+  // used to distinguish between instructions.
+  NEON2RegMiscFPMask = NEON2RegMiscMask | 0x00800000,
+  NEON_FABS   = NEON2RegMiscFixed | 0x0080F000,
+  NEON_FNEG   = NEON2RegMiscFixed | 0x2080F000,
+  NEON_FCVTN  = NEON2RegMiscFixed | 0x00016000,
+  NEON_FCVTXN = NEON2RegMiscFixed | 0x20016000,
+  NEON_FCVTL  = NEON2RegMiscFixed | 0x00017000,
+  NEON_FRINT32X = NEON2RegMiscFixed | 0x2001E000,
+  NEON_FRINT32Z = NEON2RegMiscFixed | 0x0001E000,
+  NEON_FRINT64X = NEON2RegMiscFixed | 0x2001F000,
+  NEON_FRINT64Z = NEON2RegMiscFixed | 0x0001F000,
+  NEON_FRINTN = NEON2RegMiscFixed | 0x00018000,
+  NEON_FRINTA = NEON2RegMiscFixed | 0x20018000,
+  NEON_FRINTP = NEON2RegMiscFixed | 0x00818000,
+  NEON_FRINTM = NEON2RegMiscFixed | 0x00019000,
+  NEON_FRINTX = NEON2RegMiscFixed | 0x20019000,
+  NEON_FRINTZ = NEON2RegMiscFixed | 0x00819000,
+  NEON_FRINTI = NEON2RegMiscFixed | 0x20819000,
+  NEON_FCVTNS = NEON2RegMiscFixed | 0x0001A000,
+  NEON_FCVTNU = NEON_FCVTNS | NEON2RegMiscUBit,
+  NEON_FCVTPS = NEON2RegMiscFixed | 0x0081A000,
+  NEON_FCVTPU = NEON_FCVTPS | NEON2RegMiscUBit,
+  NEON_FCVTMS = NEON2RegMiscFixed | 0x0001B000,
+  NEON_FCVTMU = NEON_FCVTMS | NEON2RegMiscUBit,
+  NEON_FCVTZS = NEON2RegMiscFixed | 0x0081B000,
+  NEON_FCVTZU = NEON_FCVTZS | NEON2RegMiscUBit,
+  NEON_FCVTAS = NEON2RegMiscFixed | 0x0001C000,
+  NEON_FCVTAU = NEON_FCVTAS | NEON2RegMiscUBit,
+  NEON_FSQRT  = NEON2RegMiscFixed | 0x2081F000,
+  NEON_SCVTF  = NEON2RegMiscFixed | 0x0001D000,
+  NEON_UCVTF  = NEON_SCVTF | NEON2RegMiscUBit,
+  NEON_URSQRTE = NEON2RegMiscFixed | 0x2081C000,
+  NEON_URECPE  = NEON2RegMiscFixed | 0x0081C000,
+  NEON_FRSQRTE = NEON2RegMiscFixed | 0x2081D000,
+  NEON_FRECPE  = NEON2RegMiscFixed | 0x0081D000,
+  NEON_FCMGT_zero = NEON2RegMiscFixed | 0x0080C000,
+  NEON_FCMGE_zero = NEON2RegMiscFixed | 0x2080C000,
+  NEON_FCMEQ_zero = NEON2RegMiscFixed | 0x0080D000,
+  NEON_FCMLE_zero = NEON2RegMiscFixed | 0x2080D000,
+  NEON_FCMLT_zero = NEON2RegMiscFixed | 0x0080E000,
+
+  NEON_FCVTL_opcode = NEON_FCVTL & NEON2RegMiscOpcode,
+  NEON_FCVTN_opcode = NEON_FCVTN & NEON2RegMiscOpcode
+};
+
+// NEON instructions with two register operands (FP16).
+enum NEON2RegMiscFP16Op : uint32_t {
+  NEON2RegMiscFP16Fixed = 0x0E780800,
+  NEON2RegMiscFP16FMask = 0x9F7E0C00,
+  NEON2RegMiscFP16Mask  = 0xBFFFFC00,
+  NEON_FRINTN_H     = NEON2RegMiscFP16Fixed | 0x00018000,
+  NEON_FRINTM_H     = NEON2RegMiscFP16Fixed | 0x00019000,
+  NEON_FCVTNS_H     = NEON2RegMiscFP16Fixed | 0x0001A000,
+  NEON_FCVTMS_H     = NEON2RegMiscFP16Fixed | 0x0001B000,
+  NEON_FCVTAS_H     = NEON2RegMiscFP16Fixed | 0x0001C000,
+  NEON_SCVTF_H      = NEON2RegMiscFP16Fixed | 0x0001D000,
+  NEON_FCMGT_H_zero = NEON2RegMiscFP16Fixed | 0x0080C000,
+  NEON_FCMEQ_H_zero = NEON2RegMiscFP16Fixed | 0x0080D000,
+  NEON_FCMLT_H_zero = NEON2RegMiscFP16Fixed | 0x0080E000,
+  NEON_FABS_H       = NEON2RegMiscFP16Fixed | 0x0080F000,
+  NEON_FRINTP_H     = NEON2RegMiscFP16Fixed | 0x00818000,
+  NEON_FRINTZ_H     = NEON2RegMiscFP16Fixed | 0x00819000,
+  NEON_FCVTPS_H     = NEON2RegMiscFP16Fixed | 0x0081A000,
+  NEON_FCVTZS_H     = NEON2RegMiscFP16Fixed | 0x0081B000,
+  NEON_FRECPE_H     = NEON2RegMiscFP16Fixed | 0x0081D000,
+  NEON_FRINTA_H     = NEON2RegMiscFP16Fixed | 0x20018000,
+  NEON_FRINTX_H     = NEON2RegMiscFP16Fixed | 0x20019000,
+  NEON_FCVTNU_H     = NEON2RegMiscFP16Fixed | 0x2001A000,
+  NEON_FCVTMU_H     = NEON2RegMiscFP16Fixed | 0x2001B000,
+  NEON_FCVTAU_H     = NEON2RegMiscFP16Fixed | 0x2001C000,
+  NEON_UCVTF_H      = NEON2RegMiscFP16Fixed | 0x2001D000,
+  NEON_FCMGE_H_zero = NEON2RegMiscFP16Fixed | 0x2080C000,
+  NEON_FCMLE_H_zero = NEON2RegMiscFP16Fixed | 0x2080D000,
+  NEON_FNEG_H       = NEON2RegMiscFP16Fixed | 0x2080F000,
+  NEON_FRINTI_H     = NEON2RegMiscFP16Fixed | 0x20819000,
+  NEON_FCVTPU_H     = NEON2RegMiscFP16Fixed | 0x2081A000,
+  NEON_FCVTZU_H     = NEON2RegMiscFP16Fixed | 0x2081B000,
+  NEON_FRSQRTE_H    = NEON2RegMiscFP16Fixed | 0x2081D000,
+  NEON_FSQRT_H      = NEON2RegMiscFP16Fixed | 0x2081F000
+};
+
+// NEON instructions with three same-type operands.
+enum NEON3SameOp : uint32_t {
+  NEON3SameFixed = 0x0E200400,
+  NEON3SameFMask = 0x9F200400,
+  NEON3SameMask = 0xBF20FC00,
+  NEON3SameUBit = 0x20000000,
+  NEON_ADD    = NEON3SameFixed | 0x00008000,
+  NEON_ADDP   = NEON3SameFixed | 0x0000B800,
+  NEON_SHADD  = NEON3SameFixed | 0x00000000,
+  NEON_SHSUB  = NEON3SameFixed | 0x00002000,
+  NEON_SRHADD = NEON3SameFixed | 0x00001000,
+  NEON_CMEQ   = NEON3SameFixed | NEON3SameUBit | 0x00008800,
+  NEON_CMGE   = NEON3SameFixed | 0x00003800,
+  NEON_CMGT   = NEON3SameFixed | 0x00003000,
+  NEON_CMHI   = NEON3SameFixed | NEON3SameUBit | NEON_CMGT,
+  NEON_CMHS   = NEON3SameFixed | NEON3SameUBit | NEON_CMGE,
+  NEON_CMTST  = NEON3SameFixed | 0x00008800,
+  NEON_MLA    = NEON3SameFixed | 0x00009000,
+  NEON_MLS    = NEON3SameFixed | 0x20009000,
+  NEON_MUL    = NEON3SameFixed | 0x00009800,
+  NEON_PMUL   = NEON3SameFixed | 0x20009800,
+  NEON_SRSHL  = NEON3SameFixed | 0x00005000,
+  NEON_SQSHL  = NEON3SameFixed | 0x00004800,
+  NEON_SQRSHL = NEON3SameFixed | 0x00005800,
+  NEON_SSHL   = NEON3SameFixed | 0x00004000,
+  NEON_SMAX   = NEON3SameFixed | 0x00006000,
+  NEON_SMAXP  = NEON3SameFixed | 0x0000A000,
+  NEON_SMIN   = NEON3SameFixed | 0x00006800,
+  NEON_SMINP  = NEON3SameFixed | 0x0000A800,
+  NEON_SABD   = NEON3SameFixed | 0x00007000,
+  NEON_SABA   = NEON3SameFixed | 0x00007800,
+  NEON_UABD   = NEON3SameFixed | NEON3SameUBit | NEON_SABD,
+  NEON_UABA   = NEON3SameFixed | NEON3SameUBit | NEON_SABA,
+  NEON_SQADD  = NEON3SameFixed | 0x00000800,
+  NEON_SQSUB  = NEON3SameFixed | 0x00002800,
+  NEON_SUB    = NEON3SameFixed | NEON3SameUBit | 0x00008000,
+  NEON_UHADD  = NEON3SameFixed | NEON3SameUBit | NEON_SHADD,
+  NEON_UHSUB  = NEON3SameFixed | NEON3SameUBit | NEON_SHSUB,
+  NEON_URHADD = NEON3SameFixed | NEON3SameUBit | NEON_SRHADD,
+  NEON_UMAX   = NEON3SameFixed | NEON3SameUBit | NEON_SMAX,
+  NEON_UMAXP  = NEON3SameFixed | NEON3SameUBit | NEON_SMAXP,
+  NEON_UMIN   = NEON3SameFixed | NEON3SameUBit | NEON_SMIN,
+  NEON_UMINP  = NEON3SameFixed | NEON3SameUBit | NEON_SMINP,
+  NEON_URSHL  = NEON3SameFixed | NEON3SameUBit | NEON_SRSHL,
+  NEON_UQADD  = NEON3SameFixed | NEON3SameUBit | NEON_SQADD,
+  NEON_UQRSHL = NEON3SameFixed | NEON3SameUBit | NEON_SQRSHL,
+  NEON_UQSHL  = NEON3SameFixed | NEON3SameUBit | NEON_SQSHL,
+  NEON_UQSUB  = NEON3SameFixed | NEON3SameUBit | NEON_SQSUB,
+  NEON_USHL   = NEON3SameFixed | NEON3SameUBit | NEON_SSHL,
+  NEON_SQDMULH  = NEON3SameFixed | 0x0000B000,
+  NEON_SQRDMULH = NEON3SameFixed | 0x2000B000,
+
+  // NEON floating point instructions with three same-type operands.
+  NEON3SameFPFixed = NEON3SameFixed | 0x0000C000,
+  NEON3SameFPFMask = NEON3SameFMask | 0x0000C000,
+  NEON3SameFPMask = NEON3SameMask | 0x00800000,
+  NEON_FADD    = NEON3SameFixed | 0x0000D000,
+  NEON_FSUB    = NEON3SameFixed | 0x0080D000,
+  NEON_FMUL    = NEON3SameFixed | 0x2000D800,
+  NEON_FDIV    = NEON3SameFixed | 0x2000F800,
+  NEON_FMAX    = NEON3SameFixed | 0x0000F000,
+  NEON_FMAXNM  = NEON3SameFixed | 0x0000C000,
+  NEON_FMAXP   = NEON3SameFixed | 0x2000F000,
+  NEON_FMAXNMP = NEON3SameFixed | 0x2000C000,
+  NEON_FMIN    = NEON3SameFixed | 0x0080F000,
+  NEON_FMINNM  = NEON3SameFixed | 0x0080C000,
+  NEON_FMINP   = NEON3SameFixed | 0x2080F000,
+  NEON_FMINNMP = NEON3SameFixed | 0x2080C000,
+  NEON_FMLA    = NEON3SameFixed | 0x0000C800,
+  NEON_FMLS    = NEON3SameFixed | 0x0080C800,
+  NEON_FMULX   = NEON3SameFixed | 0x0000D800,
+  NEON_FRECPS  = NEON3SameFixed | 0x0000F800,
+  NEON_FRSQRTS = NEON3SameFixed | 0x0080F800,
+  NEON_FABD    = NEON3SameFixed | 0x2080D000,
+  NEON_FADDP   = NEON3SameFixed | 0x2000D000,
+  NEON_FCMEQ   = NEON3SameFixed | 0x0000E000,
+  NEON_FCMGE   = NEON3SameFixed | 0x2000E000,
+  NEON_FCMGT   = NEON3SameFixed | 0x2080E000,
+  NEON_FACGE   = NEON3SameFixed | 0x2000E800,
+  NEON_FACGT   = NEON3SameFixed | 0x2080E800,
+
+  // NEON logical instructions with three same-type operands.
+  NEON3SameLogicalFixed = NEON3SameFixed | 0x00001800,
+  NEON3SameLogicalFMask = NEON3SameFMask | 0x0000F800,
+  NEON3SameLogicalMask = 0xBFE0FC00,
+  NEON3SameLogicalFormatMask = NEON_Q,
+  NEON_AND = NEON3SameLogicalFixed | 0x00000000,
+  NEON_ORR = NEON3SameLogicalFixed | 0x00A00000,
+  NEON_ORN = NEON3SameLogicalFixed | 0x00C00000,
+  NEON_EOR = NEON3SameLogicalFixed | 0x20000000,
+  NEON_BIC = NEON3SameLogicalFixed | 0x00400000,
+  NEON_BIF = NEON3SameLogicalFixed | 0x20C00000,
+  NEON_BIT = NEON3SameLogicalFixed | 0x20800000,
+  NEON_BSL = NEON3SameLogicalFixed | 0x20400000,
+
+  // FHM (FMLAL-like) instructions have an oddball encoding scheme under 3Same.
+  NEON3SameFHMMask = 0xBFE0FC00,                // U  size  opcode
+  NEON_FMLAL   = NEON3SameFixed | 0x0000E800,   // 0    00   11101
+  NEON_FMLAL2  = NEON3SameFixed | 0x2000C800,   // 1    00   11001
+  NEON_FMLSL   = NEON3SameFixed | 0x0080E800,   // 0    10   11101
+  NEON_FMLSL2  = NEON3SameFixed | 0x2080C800    // 1    10   11001
+};
+
+enum NEON3SameFP16 : uint32_t {
+  NEON3SameFP16Fixed = 0x0E400400,
+  NEON3SameFP16FMask = 0x9F60C400,
+  NEON3SameFP16Mask =  0xBFE0FC00,
+  NEON_FMAXNM_H  = NEON3SameFP16Fixed | 0x00000000,
+  NEON_FMLA_H    = NEON3SameFP16Fixed | 0x00000800,
+  NEON_FADD_H    = NEON3SameFP16Fixed | 0x00001000,
+  NEON_FMULX_H   = NEON3SameFP16Fixed | 0x00001800,
+  NEON_FCMEQ_H   = NEON3SameFP16Fixed | 0x00002000,
+  NEON_FMAX_H    = NEON3SameFP16Fixed | 0x00003000,
+  NEON_FRECPS_H  = NEON3SameFP16Fixed | 0x00003800,
+  NEON_FMINNM_H  = NEON3SameFP16Fixed | 0x00800000,
+  NEON_FMLS_H    = NEON3SameFP16Fixed | 0x00800800,
+  NEON_FSUB_H    = NEON3SameFP16Fixed | 0x00801000,
+  NEON_FMIN_H    = NEON3SameFP16Fixed | 0x00803000,
+  NEON_FRSQRTS_H = NEON3SameFP16Fixed | 0x00803800,
+  NEON_FMAXNMP_H = NEON3SameFP16Fixed | 0x20000000,
+  NEON_FADDP_H   = NEON3SameFP16Fixed | 0x20001000,
+  NEON_FMUL_H    = NEON3SameFP16Fixed | 0x20001800,
+  NEON_FCMGE_H   = NEON3SameFP16Fixed | 0x20002000,
+  NEON_FACGE_H   = NEON3SameFP16Fixed | 0x20002800,
+  NEON_FMAXP_H   = NEON3SameFP16Fixed | 0x20003000,
+  NEON_FDIV_H    = NEON3SameFP16Fixed | 0x20003800,
+  NEON_FMINNMP_H = NEON3SameFP16Fixed | 0x20800000,
+  NEON_FABD_H    = NEON3SameFP16Fixed | 0x20801000,
+  NEON_FCMGT_H   = NEON3SameFP16Fixed | 0x20802000,
+  NEON_FACGT_H   = NEON3SameFP16Fixed | 0x20802800,
+  NEON_FMINP_H   = NEON3SameFP16Fixed | 0x20803000
+};
+
+// 'Extra' NEON instructions with three same-type operands.
+enum NEON3SameExtraOp : uint32_t {
+  NEON3SameExtraFixed = 0x0E008400,
+  NEON3SameExtraUBit = 0x20000000,
+  NEON3SameExtraFMask = 0x9E208400,
+  NEON3SameExtraMask = 0xBE20FC00,
+  NEON_SQRDMLAH = NEON3SameExtraFixed | NEON3SameExtraUBit,
+  NEON_SQRDMLSH = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00000800,
+  NEON_SDOT = NEON3SameExtraFixed | 0x00001000,
+  NEON_UDOT = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00001000,
+
+  /* v8.3 Complex Numbers */
+  NEON3SameExtraFCFixed = 0x2E00C400,
+  NEON3SameExtraFCFMask = 0xBF20C400,
+  // FCMLA fixes opcode<3:2>, and uses opcode<1:0> to encode <rotate>.
+  NEON3SameExtraFCMLAMask = NEON3SameExtraFCFMask | 0x00006000,
+  NEON_FCMLA = NEON3SameExtraFCFixed,
+  // FCADD fixes opcode<3:2, 0>, and uses opcode<1> to encode <rotate>.
+  NEON3SameExtraFCADDMask = NEON3SameExtraFCFMask | 0x00006800,
+  NEON_FCADD = NEON3SameExtraFCFixed | 0x00002000
+  // Other encodings under NEON3SameExtraFCFMask are UNALLOCATED.
+};
+
+// NEON instructions with three different-type operands.
+enum NEON3DifferentOp : uint32_t {
+  NEON3DifferentFixed = 0x0E200000,
+  NEON3DifferentFMask = 0x9F200C00,
+  NEON3DifferentMask  = 0xFF20FC00,
+  NEON_ADDHN    = NEON3DifferentFixed | 0x00004000,
+  NEON_ADDHN2   = NEON_ADDHN | NEON_Q,
+  NEON_PMULL    = NEON3DifferentFixed | 0x0000E000,
+  NEON_PMULL2   = NEON_PMULL | NEON_Q,
+  NEON_RADDHN   = NEON3DifferentFixed | 0x20004000,
+  NEON_RADDHN2  = NEON_RADDHN | NEON_Q,
+  NEON_RSUBHN   = NEON3DifferentFixed | 0x20006000,
+  NEON_RSUBHN2  = NEON_RSUBHN | NEON_Q,
+  NEON_SABAL    = NEON3DifferentFixed | 0x00005000,
+  NEON_SABAL2   = NEON_SABAL | NEON_Q,
+  NEON_SABDL    = NEON3DifferentFixed | 0x00007000,
+  NEON_SABDL2   = NEON_SABDL | NEON_Q,
+  NEON_SADDL    = NEON3DifferentFixed | 0x00000000,
+  NEON_SADDL2   = NEON_SADDL | NEON_Q,
+  NEON_SADDW    = NEON3DifferentFixed | 0x00001000,
+  NEON_SADDW2   = NEON_SADDW | NEON_Q,
+  NEON_SMLAL    = NEON3DifferentFixed | 0x00008000,
+  NEON_SMLAL2   = NEON_SMLAL | NEON_Q,
+  NEON_SMLSL    = NEON3DifferentFixed | 0x0000A000,
+  NEON_SMLSL2   = NEON_SMLSL | NEON_Q,
+  NEON_SMULL    = NEON3DifferentFixed | 0x0000C000,
+  NEON_SMULL2   = NEON_SMULL | NEON_Q,
+  NEON_SSUBL    = NEON3DifferentFixed | 0x00002000,
+  NEON_SSUBL2   = NEON_SSUBL | NEON_Q,
+  NEON_SSUBW    = NEON3DifferentFixed | 0x00003000,
+  NEON_SSUBW2   = NEON_SSUBW | NEON_Q,
+  NEON_SQDMLAL  = NEON3DifferentFixed | 0x00009000,
+  NEON_SQDMLAL2 = NEON_SQDMLAL | NEON_Q,
+  NEON_SQDMLSL  = NEON3DifferentFixed | 0x0000B000,
+  NEON_SQDMLSL2 = NEON_SQDMLSL | NEON_Q,
+  NEON_SQDMULL  = NEON3DifferentFixed | 0x0000D000,
+  NEON_SQDMULL2 = NEON_SQDMULL | NEON_Q,
+  NEON_SUBHN    = NEON3DifferentFixed | 0x00006000,
+  NEON_SUBHN2   = NEON_SUBHN | NEON_Q,
+  NEON_UABAL    = NEON_SABAL | NEON3SameUBit,
+  NEON_UABAL2   = NEON_UABAL | NEON_Q,
+  NEON_UABDL    = NEON_SABDL | NEON3SameUBit,
+  NEON_UABDL2   = NEON_UABDL | NEON_Q,
+  NEON_UADDL    = NEON_SADDL | NEON3SameUBit,
+  NEON_UADDL2   = NEON_UADDL | NEON_Q,
+  NEON_UADDW    = NEON_SADDW | NEON3SameUBit,
+  NEON_UADDW2   = NEON_UADDW | NEON_Q,
+  NEON_UMLAL    = NEON_SMLAL | NEON3SameUBit,
+  NEON_UMLAL2   = NEON_UMLAL | NEON_Q,
+  NEON_UMLSL    = NEON_SMLSL | NEON3SameUBit,
+  NEON_UMLSL2   = NEON_UMLSL | NEON_Q,
+  NEON_UMULL    = NEON_SMULL | NEON3SameUBit,
+  NEON_UMULL2   = NEON_UMULL | NEON_Q,
+  NEON_USUBL    = NEON_SSUBL | NEON3SameUBit,
+  NEON_USUBL2   = NEON_USUBL | NEON_Q,
+  NEON_USUBW    = NEON_SSUBW | NEON3SameUBit,
+  NEON_USUBW2   = NEON_USUBW | NEON_Q
+};
+
+// NEON instructions operating across vectors.
+enum NEONAcrossLanesOp : uint32_t {
+  NEONAcrossLanesFixed = 0x0E300800,
+  NEONAcrossLanesFMask = 0x9F3E0C00,
+  NEONAcrossLanesMask  = 0xBF3FFC00,
+  NEON_ADDV   = NEONAcrossLanesFixed | 0x0001B000,
+  NEON_SADDLV = NEONAcrossLanesFixed | 0x00003000,
+  NEON_UADDLV = NEONAcrossLanesFixed | 0x20003000,
+  NEON_SMAXV  = NEONAcrossLanesFixed | 0x0000A000,
+  NEON_SMINV  = NEONAcrossLanesFixed | 0x0001A000,
+  NEON_UMAXV  = NEONAcrossLanesFixed | 0x2000A000,
+  NEON_UMINV  = NEONAcrossLanesFixed | 0x2001A000,
+
+  NEONAcrossLanesFP16Fixed = NEONAcrossLanesFixed | 0x0000C000,
+  NEONAcrossLanesFP16FMask = NEONAcrossLanesFMask | 0x2000C000,
+  NEONAcrossLanesFP16Mask  = NEONAcrossLanesMask  | 0x20800000,
+  NEON_FMAXNMV_H = NEONAcrossLanesFP16Fixed | 0x00000000,
+  NEON_FMAXV_H   = NEONAcrossLanesFP16Fixed | 0x00003000,
+  NEON_FMINNMV_H = NEONAcrossLanesFP16Fixed | 0x00800000,
+  NEON_FMINV_H   = NEONAcrossLanesFP16Fixed | 0x00803000,
+
+  // NEON floating point across instructions.
+  NEONAcrossLanesFPFixed = NEONAcrossLanesFixed | 0x2000C000,
+  NEONAcrossLanesFPFMask = NEONAcrossLanesFMask | 0x2000C000,
+  NEONAcrossLanesFPMask  = NEONAcrossLanesMask  | 0x20800000,
+
+  NEON_FMAXV   = NEONAcrossLanesFPFixed | 0x2000F000,
+  NEON_FMINV   = NEONAcrossLanesFPFixed | 0x2080F000,
+  NEON_FMAXNMV = NEONAcrossLanesFPFixed | 0x2000C000,
+  NEON_FMINNMV = NEONAcrossLanesFPFixed | 0x2080C000
+};
+
+// NEON instructions with indexed element operand.
+enum NEONByIndexedElementOp : uint32_t {
+  NEONByIndexedElementFixed = 0x0F000000,
+  NEONByIndexedElementFMask = 0x9F000400,
+  NEONByIndexedElementMask  = 0xBF00F400,
+  NEON_MUL_byelement   = NEONByIndexedElementFixed | 0x00008000,
+  NEON_MLA_byelement   = NEONByIndexedElementFixed | 0x20000000,
+  NEON_MLS_byelement   = NEONByIndexedElementFixed | 0x20004000,
+  NEON_SMULL_byelement = NEONByIndexedElementFixed | 0x0000A000,
+  NEON_SMLAL_byelement = NEONByIndexedElementFixed | 0x00002000,
+  NEON_SMLSL_byelement = NEONByIndexedElementFixed | 0x00006000,
+  NEON_UMULL_byelement = NEONByIndexedElementFixed | 0x2000A000,
+  NEON_UMLAL_byelement = NEONByIndexedElementFixed | 0x20002000,
+  NEON_UMLSL_byelement = NEONByIndexedElementFixed | 0x20006000,
+  NEON_SQDMULL_byelement = NEONByIndexedElementFixed | 0x0000B000,
+  NEON_SQDMLAL_byelement = NEONByIndexedElementFixed | 0x00003000,
+  NEON_SQDMLSL_byelement = NEONByIndexedElementFixed | 0x00007000,
+  NEON_SQDMULH_byelement  = NEONByIndexedElementFixed | 0x0000C000,
+  NEON_SQRDMULH_byelement = NEONByIndexedElementFixed | 0x0000D000,
+  NEON_SDOT_byelement = NEONByIndexedElementFixed | 0x0000E000,
+  NEON_SQRDMLAH_byelement = NEONByIndexedElementFixed | 0x2000D000,
+  NEON_UDOT_byelement = NEONByIndexedElementFixed | 0x2000E000,
+  NEON_SQRDMLSH_byelement = NEONByIndexedElementFixed | 0x2000F000,
+
+  NEON_FMLA_H_byelement   = NEONByIndexedElementFixed | 0x00001000,
+  NEON_FMLS_H_byelement   = NEONByIndexedElementFixed | 0x00005000,
+  NEON_FMUL_H_byelement   = NEONByIndexedElementFixed | 0x00009000,
+  NEON_FMULX_H_byelement  = NEONByIndexedElementFixed | 0x20009000,
+
+  // Floating point instructions.
+  NEONByIndexedElementFPFixed = NEONByIndexedElementFixed | 0x00800000,
+  NEONByIndexedElementFPMask = NEONByIndexedElementMask | 0x00800000,
+  NEON_FMLA_byelement  = NEONByIndexedElementFPFixed | 0x00001000,
+  NEON_FMLS_byelement  = NEONByIndexedElementFPFixed | 0x00005000,
+  NEON_FMUL_byelement  = NEONByIndexedElementFPFixed | 0x00009000,
+  NEON_FMULX_byelement = NEONByIndexedElementFPFixed | 0x20009000,
+
+  // FMLAL-like instructions.
+  // For all cases: U = x, size = 10, opcode = xx00
+  NEONByIndexedElementFPLongFixed = NEONByIndexedElementFixed | 0x00800000,
+  NEONByIndexedElementFPLongFMask = NEONByIndexedElementFMask | 0x00C03000,
+  NEONByIndexedElementFPLongMask = 0xBFC0F400,
+  NEON_FMLAL_H_byelement  = NEONByIndexedElementFixed | 0x00800000,
+  NEON_FMLAL2_H_byelement = NEONByIndexedElementFixed | 0x20808000,
+  NEON_FMLSL_H_byelement  = NEONByIndexedElementFixed | 0x00804000,
+  NEON_FMLSL2_H_byelement = NEONByIndexedElementFixed | 0x2080C000,
+
+  // Complex instruction(s).
+  // This is necessary because the 'rot' encoding moves into the
+  // NEONByIndex..Mask space.
+  NEONByIndexedElementFPComplexMask = 0xBF009400,
+  NEON_FCMLA_byelement = NEONByIndexedElementFixed | 0x20001000
+};
+
+// NEON register copy.
+enum NEONCopyOp : uint32_t {
+  NEONCopyFixed = 0x0E000400,
+  NEONCopyFMask = 0x9FE08400,
+  NEONCopyMask  = 0x3FE08400,
+  NEONCopyInsElementMask = NEONCopyMask | 0x40000000,
+  NEONCopyInsGeneralMask = NEONCopyMask | 0x40007800,
+  NEONCopyDupElementMask = NEONCopyMask | 0x20007800,
+  NEONCopyDupGeneralMask = NEONCopyDupElementMask,
+  NEONCopyUmovMask       = NEONCopyMask | 0x20007800,
+  NEONCopySmovMask       = NEONCopyMask | 0x20007800,
+  NEON_INS_ELEMENT       = NEONCopyFixed | 0x60000000,
+  NEON_INS_GENERAL       = NEONCopyFixed | 0x40001800,
+  NEON_DUP_ELEMENT       = NEONCopyFixed | 0x00000000,
+  NEON_DUP_GENERAL       = NEONCopyFixed | 0x00000800,
+  NEON_SMOV              = NEONCopyFixed | 0x00002800,
+  NEON_UMOV              = NEONCopyFixed | 0x00003800
+};
+
+// NEON extract.
+enum NEONExtractOp : uint32_t {
+  NEONExtractFixed = 0x2E000000,
+  NEONExtractFMask = 0xBF208400,
+  NEONExtractMask = 0xBFE08400,
+  NEON_EXT = NEONExtractFixed | 0x00000000
+};
+
+enum NEONLoadStoreMultiOp : uint32_t {
+  NEONLoadStoreMultiL    = 0x00400000,
+  NEONLoadStoreMulti1_1v = 0x00007000,
+  NEONLoadStoreMulti1_2v = 0x0000A000,
+  NEONLoadStoreMulti1_3v = 0x00006000,
+  NEONLoadStoreMulti1_4v = 0x00002000,
+  NEONLoadStoreMulti2    = 0x00008000,
+  NEONLoadStoreMulti3    = 0x00004000,
+  NEONLoadStoreMulti4    = 0x00000000
+};
+
+// NEON load/store multiple structures.
+enum NEONLoadStoreMultiStructOp : uint32_t {
+  NEONLoadStoreMultiStructFixed = 0x0C000000,
+  NEONLoadStoreMultiStructFMask = 0xBFBF0000,
+  NEONLoadStoreMultiStructMask  = 0xBFFFF000,
+  NEONLoadStoreMultiStructStore = NEONLoadStoreMultiStructFixed,
+  NEONLoadStoreMultiStructLoad  = NEONLoadStoreMultiStructFixed |
+                                  NEONLoadStoreMultiL,
+  NEON_LD1_1v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_1v,
+  NEON_LD1_2v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_2v,
+  NEON_LD1_3v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_3v,
+  NEON_LD1_4v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_4v,
+  NEON_LD2    = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti2,
+  NEON_LD3    = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti3,
+  NEON_LD4    = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti4,
+  NEON_ST1_1v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_1v,
+  NEON_ST1_2v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_2v,
+  NEON_ST1_3v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_3v,
+  NEON_ST1_4v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_4v,
+  NEON_ST2    = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti2,
+  NEON_ST3    = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti3,
+  NEON_ST4    = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti4
+};
+
+// NEON load/store multiple structures with post-index addressing.
+enum NEONLoadStoreMultiStructPostIndexOp : uint32_t {
+  NEONLoadStoreMultiStructPostIndexFixed = 0x0C800000,
+  NEONLoadStoreMultiStructPostIndexFMask = 0xBFA00000,
+  NEONLoadStoreMultiStructPostIndexMask  = 0xBFE0F000,
+  NEONLoadStoreMultiStructPostIndex = 0x00800000,
+  NEON_LD1_1v_post = NEON_LD1_1v | NEONLoadStoreMultiStructPostIndex,
+  NEON_LD1_2v_post = NEON_LD1_2v | NEONLoadStoreMultiStructPostIndex,
+  NEON_LD1_3v_post = NEON_LD1_3v | NEONLoadStoreMultiStructPostIndex,
+  NEON_LD1_4v_post = NEON_LD1_4v | NEONLoadStoreMultiStructPostIndex,
+  NEON_LD2_post = NEON_LD2 | NEONLoadStoreMultiStructPostIndex,
+  NEON_LD3_post = NEON_LD3 | NEONLoadStoreMultiStructPostIndex,
+  NEON_LD4_post = NEON_LD4 | NEONLoadStoreMultiStructPostIndex,
+  NEON_ST1_1v_post = NEON_ST1_1v | NEONLoadStoreMultiStructPostIndex,
+  NEON_ST1_2v_post = NEON_ST1_2v | NEONLoadStoreMultiStructPostIndex,
+  NEON_ST1_3v_post = NEON_ST1_3v | NEONLoadStoreMultiStructPostIndex,
+  NEON_ST1_4v_post = NEON_ST1_4v | NEONLoadStoreMultiStructPostIndex,
+  NEON_ST2_post = NEON_ST2 | NEONLoadStoreMultiStructPostIndex,
+  NEON_ST3_post = NEON_ST3 | NEONLoadStoreMultiStructPostIndex,
+  NEON_ST4_post = NEON_ST4 | NEONLoadStoreMultiStructPostIndex
+};
+
+enum NEONLoadStoreSingleOp : uint32_t {
+  NEONLoadStoreSingle1        = 0x00000000,
+  NEONLoadStoreSingle2        = 0x00200000,
+  NEONLoadStoreSingle3        = 0x00002000,
+  NEONLoadStoreSingle4        = 0x00202000,
+  NEONLoadStoreSingleL        = 0x00400000,
+  NEONLoadStoreSingle_b       = 0x00000000,
+  NEONLoadStoreSingle_h       = 0x00004000,
+  NEONLoadStoreSingle_s       = 0x00008000,
+  NEONLoadStoreSingle_d       = 0x00008400,
+  NEONLoadStoreSingleAllLanes = 0x0000C000,
+  NEONLoadStoreSingleLenMask  = 0x00202000
+};
+
+// NEON load/store single structure.
+enum NEONLoadStoreSingleStructOp : uint32_t {
+  NEONLoadStoreSingleStructFixed = 0x0D000000,
+  NEONLoadStoreSingleStructFMask = 0xBF9F0000,
+  NEONLoadStoreSingleStructMask  = 0xBFFFE000,
+  NEONLoadStoreSingleStructStore = NEONLoadStoreSingleStructFixed,
+  NEONLoadStoreSingleStructLoad  = NEONLoadStoreSingleStructFixed |
+                                   NEONLoadStoreSingleL,
+  NEONLoadStoreSingleStructLoad1 = NEONLoadStoreSingle1 |
+                                   NEONLoadStoreSingleStructLoad,
+  NEONLoadStoreSingleStructLoad2 = NEONLoadStoreSingle2 |
+                                   NEONLoadStoreSingleStructLoad,
+  NEONLoadStoreSingleStructLoad3 = NEONLoadStoreSingle3 |
+                                   NEONLoadStoreSingleStructLoad,
+  NEONLoadStoreSingleStructLoad4 = NEONLoadStoreSingle4 |
+                                   NEONLoadStoreSingleStructLoad,
+  NEONLoadStoreSingleStructStore1 = NEONLoadStoreSingle1 |
+                                    NEONLoadStoreSingleStructFixed,
+  NEONLoadStoreSingleStructStore2 = NEONLoadStoreSingle2 |
+                                    NEONLoadStoreSingleStructFixed,
+  NEONLoadStoreSingleStructStore3 = NEONLoadStoreSingle3 |
+                                    NEONLoadStoreSingleStructFixed,
+  NEONLoadStoreSingleStructStore4 = NEONLoadStoreSingle4 |
+                                    NEONLoadStoreSingleStructFixed,
+  NEON_LD1_b = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_b,
+  NEON_LD1_h = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_h,
+  NEON_LD1_s = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_s,
+  NEON_LD1_d = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_d,
+  NEON_LD1R  = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingleAllLanes,
+  NEON_ST1_b = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_b,
+  NEON_ST1_h = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_h,
+  NEON_ST1_s = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_s,
+  NEON_ST1_d = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_d,
+
+  NEON_LD2_b = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_b,
+  NEON_LD2_h = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_h,
+  NEON_LD2_s = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_s,
+  NEON_LD2_d = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_d,
+  NEON_LD2R  = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingleAllLanes,
+  NEON_ST2_b = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_b,
+  NEON_ST2_h = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_h,
+  NEON_ST2_s = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_s,
+  NEON_ST2_d = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_d,
+
+  NEON_LD3_b = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_b,
+  NEON_LD3_h = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_h,
+  NEON_LD3_s = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_s,
+  NEON_LD3_d = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_d,
+  NEON_LD3R  = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingleAllLanes,
+  NEON_ST3_b = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_b,
+  NEON_ST3_h = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_h,
+  NEON_ST3_s = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_s,
+  NEON_ST3_d = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_d,
+
+  NEON_LD4_b = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_b,
+  NEON_LD4_h = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_h,
+  NEON_LD4_s = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_s,
+  NEON_LD4_d = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_d,
+  NEON_LD4R  = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingleAllLanes,
+  NEON_ST4_b = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_b,
+  NEON_ST4_h = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_h,
+  NEON_ST4_s = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_s,
+  NEON_ST4_d = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_d
+};
+
+// NEON load/store single structure with post-index addressing.
+enum NEONLoadStoreSingleStructPostIndexOp : uint32_t {
+  NEONLoadStoreSingleStructPostIndexFixed = 0x0D800000,
+  NEONLoadStoreSingleStructPostIndexFMask = 0xBF800000,
+  NEONLoadStoreSingleStructPostIndexMask  = 0xBFE0E000,
+  NEONLoadStoreSingleStructPostIndex = 0x00800000,
+  NEON_LD1_b_post = NEON_LD1_b | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD1_h_post = NEON_LD1_h | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD1_s_post = NEON_LD1_s | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD1_d_post = NEON_LD1_d | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD1R_post  = NEON_LD1R | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST1_b_post = NEON_ST1_b | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST1_h_post = NEON_ST1_h | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST1_s_post = NEON_ST1_s | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST1_d_post = NEON_ST1_d | NEONLoadStoreSingleStructPostIndex,
+
+  NEON_LD2_b_post = NEON_LD2_b | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD2_h_post = NEON_LD2_h | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD2_s_post = NEON_LD2_s | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD2_d_post = NEON_LD2_d | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD2R_post  = NEON_LD2R | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST2_b_post = NEON_ST2_b | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST2_h_post = NEON_ST2_h | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST2_s_post = NEON_ST2_s | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST2_d_post = NEON_ST2_d | NEONLoadStoreSingleStructPostIndex,
+
+  NEON_LD3_b_post = NEON_LD3_b | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD3_h_post = NEON_LD3_h | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD3_s_post = NEON_LD3_s | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD3_d_post = NEON_LD3_d | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD3R_post  = NEON_LD3R | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST3_b_post = NEON_ST3_b | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST3_h_post = NEON_ST3_h | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST3_s_post = NEON_ST3_s | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST3_d_post = NEON_ST3_d | NEONLoadStoreSingleStructPostIndex,
+
+  NEON_LD4_b_post = NEON_LD4_b | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD4_h_post = NEON_LD4_h | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD4_s_post = NEON_LD4_s | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD4_d_post = NEON_LD4_d | NEONLoadStoreSingleStructPostIndex,
+  NEON_LD4R_post  = NEON_LD4R | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST4_b_post = NEON_ST4_b | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST4_h_post = NEON_ST4_h | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST4_s_post = NEON_ST4_s | NEONLoadStoreSingleStructPostIndex,
+  NEON_ST4_d_post = NEON_ST4_d | NEONLoadStoreSingleStructPostIndex
+};
+
+// NEON modified immediate.
+enum NEONModifiedImmediateOp : uint32_t {
+  NEONModifiedImmediateFixed = 0x0F000400,
+  NEONModifiedImmediateFMask = 0x9FF80400,
+  NEONModifiedImmediateOpBit = 0x20000000,
+  NEONModifiedImmediate_FMOV = NEONModifiedImmediateFixed | 0x00000800,
+  NEONModifiedImmediate_MOVI = NEONModifiedImmediateFixed | 0x00000000,
+  NEONModifiedImmediate_MVNI = NEONModifiedImmediateFixed | 0x20000000,
+  NEONModifiedImmediate_ORR  = NEONModifiedImmediateFixed | 0x00001000,
+  NEONModifiedImmediate_BIC  = NEONModifiedImmediateFixed | 0x20001000
+};
+
+// NEON shift immediate.
+enum NEONShiftImmediateOp : uint32_t {
+  NEONShiftImmediateFixed = 0x0F000400,
+  NEONShiftImmediateFMask = 0x9F800400,
+  NEONShiftImmediateMask  = 0xBF80FC00,
+  NEONShiftImmediateUBit  = 0x20000000,
+  NEON_SHL      = NEONShiftImmediateFixed | 0x00005000,
+  NEON_SSHLL    = NEONShiftImmediateFixed | 0x0000A000,
+  NEON_USHLL    = NEONShiftImmediateFixed | 0x2000A000,
+  NEON_SLI      = NEONShiftImmediateFixed | 0x20005000,
+  NEON_SRI      = NEONShiftImmediateFixed | 0x20004000,
+  NEON_SHRN     = NEONShiftImmediateFixed | 0x00008000,
+  NEON_RSHRN    = NEONShiftImmediateFixed | 0x00008800,
+  NEON_UQSHRN   = NEONShiftImmediateFixed | 0x20009000,
+  NEON_UQRSHRN  = NEONShiftImmediateFixed | 0x20009800,
+  NEON_SQSHRN   = NEONShiftImmediateFixed | 0x00009000,
+  NEON_SQRSHRN  = NEONShiftImmediateFixed | 0x00009800,
+  NEON_SQSHRUN  = NEONShiftImmediateFixed | 0x20008000,
+  NEON_SQRSHRUN = NEONShiftImmediateFixed | 0x20008800,
+  NEON_SSHR     = NEONShiftImmediateFixed | 0x00000000,
+  NEON_SRSHR    = NEONShiftImmediateFixed | 0x00002000,
+  NEON_USHR     = NEONShiftImmediateFixed | 0x20000000,
+  NEON_URSHR    = NEONShiftImmediateFixed | 0x20002000,
+  NEON_SSRA     = NEONShiftImmediateFixed | 0x00001000,
+  NEON_SRSRA    = NEONShiftImmediateFixed | 0x00003000,
+  NEON_USRA     = NEONShiftImmediateFixed | 0x20001000,
+  NEON_URSRA    = NEONShiftImmediateFixed | 0x20003000,
+  NEON_SQSHLU   = NEONShiftImmediateFixed | 0x20006000,
+  NEON_SCVTF_imm = NEONShiftImmediateFixed | 0x0000E000,
+  NEON_UCVTF_imm = NEONShiftImmediateFixed | 0x2000E000,
+  NEON_FCVTZS_imm = NEONShiftImmediateFixed | 0x0000F800,
+  NEON_FCVTZU_imm = NEONShiftImmediateFixed | 0x2000F800,
+  NEON_SQSHL_imm = NEONShiftImmediateFixed | 0x00007000,
+  NEON_UQSHL_imm = NEONShiftImmediateFixed | 0x20007000
+};
+
+// NEON table.
+enum NEONTableOp : uint32_t {
+  NEONTableFixed = 0x0E000000,
+  NEONTableFMask = 0xBF208C00,
+  NEONTableExt   = 0x00001000,
+  NEONTableMask  = 0xBF20FC00,
+  NEON_TBL_1v    = NEONTableFixed | 0x00000000,
+  NEON_TBL_2v    = NEONTableFixed | 0x00002000,
+  NEON_TBL_3v    = NEONTableFixed | 0x00004000,
+  NEON_TBL_4v    = NEONTableFixed | 0x00006000,
+  NEON_TBX_1v    = NEON_TBL_1v | NEONTableExt,
+  NEON_TBX_2v    = NEON_TBL_2v | NEONTableExt,
+  NEON_TBX_3v    = NEON_TBL_3v | NEONTableExt,
+  NEON_TBX_4v    = NEON_TBL_4v | NEONTableExt
+};
+
+// NEON perm.
+enum NEONPermOp : uint32_t {
+  NEONPermFixed = 0x0E000800,
+  NEONPermFMask = 0xBF208C00,
+  NEONPermMask  = 0x3F20FC00,
+  NEON_UZP1 = NEONPermFixed | 0x00001000,
+  NEON_TRN1 = NEONPermFixed | 0x00002000,
+  NEON_ZIP1 = NEONPermFixed | 0x00003000,
+  NEON_UZP2 = NEONPermFixed | 0x00005000,
+  NEON_TRN2 = NEONPermFixed | 0x00006000,
+  NEON_ZIP2 = NEONPermFixed | 0x00007000
+};
+
+// NEON scalar instructions with two register operands.
+enum NEONScalar2RegMiscOp : uint32_t {
+  NEONScalar2RegMiscFixed = 0x5E200800,
+  NEONScalar2RegMiscFMask = 0xDF3E0C00,
+  NEONScalar2RegMiscMask = NEON_Q | NEONScalar | NEON2RegMiscMask,
+  NEON_CMGT_zero_scalar = NEON_Q | NEONScalar | NEON_CMGT_zero,
+  NEON_CMEQ_zero_scalar = NEON_Q | NEONScalar | NEON_CMEQ_zero,
+  NEON_CMLT_zero_scalar = NEON_Q | NEONScalar | NEON_CMLT_zero,
+  NEON_CMGE_zero_scalar = NEON_Q | NEONScalar | NEON_CMGE_zero,
+  NEON_CMLE_zero_scalar = NEON_Q | NEONScalar | NEON_CMLE_zero,
+  NEON_ABS_scalar       = NEON_Q | NEONScalar | NEON_ABS,
+  NEON_SQABS_scalar     = NEON_Q | NEONScalar | NEON_SQABS,
+  NEON_NEG_scalar       = NEON_Q | NEONScalar | NEON_NEG,
+  NEON_SQNEG_scalar     = NEON_Q | NEONScalar | NEON_SQNEG,
+  NEON_SQXTN_scalar     = NEON_Q | NEONScalar | NEON_SQXTN,
+  NEON_UQXTN_scalar     = NEON_Q | NEONScalar | NEON_UQXTN,
+  NEON_SQXTUN_scalar    = NEON_Q | NEONScalar | NEON_SQXTUN,
+  NEON_SUQADD_scalar    = NEON_Q | NEONScalar | NEON_SUQADD,
+  NEON_USQADD_scalar    = NEON_Q | NEONScalar | NEON_USQADD,
+
+  NEONScalar2RegMiscOpcode = NEON2RegMiscOpcode,
+  NEON_NEG_scalar_opcode = NEON_NEG_scalar & NEONScalar2RegMiscOpcode,
+
+  NEONScalar2RegMiscFPMask  = NEONScalar2RegMiscMask | 0x00800000,
+  NEON_FRSQRTE_scalar    = NEON_Q | NEONScalar | NEON_FRSQRTE,
+  NEON_FRECPE_scalar     = NEON_Q | NEONScalar | NEON_FRECPE,
+  NEON_SCVTF_scalar      = NEON_Q | NEONScalar | NEON_SCVTF,
+  NEON_UCVTF_scalar      = NEON_Q | NEONScalar | NEON_UCVTF,
+  NEON_FCMGT_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_zero,
+  NEON_FCMEQ_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_zero,
+  NEON_FCMLT_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_zero,
+  NEON_FCMGE_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_zero,
+  NEON_FCMLE_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_zero,
+  NEON_FRECPX_scalar     = NEONScalar2RegMiscFixed | 0x0081F000,
+  NEON_FCVTNS_scalar     = NEON_Q | NEONScalar | NEON_FCVTNS,
+  NEON_FCVTNU_scalar     = NEON_Q | NEONScalar | NEON_FCVTNU,
+  NEON_FCVTPS_scalar     = NEON_Q | NEONScalar | NEON_FCVTPS,
+  NEON_FCVTPU_scalar     = NEON_Q | NEONScalar | NEON_FCVTPU,
+  NEON_FCVTMS_scalar     = NEON_Q | NEONScalar | NEON_FCVTMS,
+  NEON_FCVTMU_scalar     = NEON_Q | NEONScalar | NEON_FCVTMU,
+  NEON_FCVTZS_scalar     = NEON_Q | NEONScalar | NEON_FCVTZS,
+  NEON_FCVTZU_scalar     = NEON_Q | NEONScalar | NEON_FCVTZU,
+  NEON_FCVTAS_scalar     = NEON_Q | NEONScalar | NEON_FCVTAS,
+  NEON_FCVTAU_scalar     = NEON_Q | NEONScalar | NEON_FCVTAU,
+  NEON_FCVTXN_scalar     = NEON_Q | NEONScalar | NEON_FCVTXN
+};
+
+// NEON instructions with two register operands (FP16).
+enum NEONScalar2RegMiscFP16Op : uint32_t {
+  NEONScalar2RegMiscFP16Fixed = 0x5E780800,
+  NEONScalar2RegMiscFP16FMask = 0xDF7E0C00,
+  NEONScalar2RegMiscFP16Mask  = 0xFFFFFC00,
+  NEON_FCVTNS_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTNS_H,
+  NEON_FCVTMS_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTMS_H,
+  NEON_FCVTAS_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTAS_H,
+  NEON_SCVTF_H_scalar      = NEON_Q | NEONScalar | NEON_SCVTF_H,
+  NEON_FCMGT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_H_zero,
+  NEON_FCMEQ_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_H_zero,
+  NEON_FCMLT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_H_zero,
+  NEON_FCVTPS_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTPS_H,
+  NEON_FCVTZS_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTZS_H,
+  NEON_FRECPE_H_scalar     = NEON_Q | NEONScalar | NEON_FRECPE_H,
+  NEON_FRECPX_H_scalar     = NEONScalar2RegMiscFP16Fixed | 0x0081F000,
+  NEON_FCVTNU_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTNU_H,
+  NEON_FCVTMU_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTMU_H,
+  NEON_FCVTAU_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTAU_H,
+  NEON_UCVTF_H_scalar      = NEON_Q | NEONScalar | NEON_UCVTF_H,
+  NEON_FCMGE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_H_zero,
+  NEON_FCMLE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_H_zero,
+  NEON_FCVTPU_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTPU_H,
+  NEON_FCVTZU_H_scalar     = NEON_Q | NEONScalar | NEON_FCVTZU_H,
+  NEON_FRSQRTE_H_scalar    = NEON_Q | NEONScalar | NEON_FRSQRTE_H
+};
+
+// NEON scalar instructions with three same-type operands.
+enum NEONScalar3SameOp : uint32_t {
+  NEONScalar3SameFixed = 0x5E200400,
+  NEONScalar3SameFMask = 0xDF200400,
+  NEONScalar3SameMask  = 0xFF20FC00,
+  NEON_ADD_scalar    = NEON_Q | NEONScalar | NEON_ADD,
+  NEON_CMEQ_scalar   = NEON_Q | NEONScalar | NEON_CMEQ,
+  NEON_CMGE_scalar   = NEON_Q | NEONScalar | NEON_CMGE,
+  NEON_CMGT_scalar   = NEON_Q | NEONScalar | NEON_CMGT,
+  NEON_CMHI_scalar   = NEON_Q | NEONScalar | NEON_CMHI,
+  NEON_CMHS_scalar   = NEON_Q | NEONScalar | NEON_CMHS,
+  NEON_CMTST_scalar  = NEON_Q | NEONScalar | NEON_CMTST,
+  NEON_SUB_scalar    = NEON_Q | NEONScalar | NEON_SUB,
+  NEON_UQADD_scalar  = NEON_Q | NEONScalar | NEON_UQADD,
+  NEON_SQADD_scalar  = NEON_Q | NEONScalar | NEON_SQADD,
+  NEON_UQSUB_scalar  = NEON_Q | NEONScalar | NEON_UQSUB,
+  NEON_SQSUB_scalar  = NEON_Q | NEONScalar | NEON_SQSUB,
+  NEON_USHL_scalar   = NEON_Q | NEONScalar | NEON_USHL,
+  NEON_SSHL_scalar   = NEON_Q | NEONScalar | NEON_SSHL,
+  NEON_UQSHL_scalar  = NEON_Q | NEONScalar | NEON_UQSHL,
+  NEON_SQSHL_scalar  = NEON_Q | NEONScalar | NEON_SQSHL,
+  NEON_URSHL_scalar  = NEON_Q | NEONScalar | NEON_URSHL,
+  NEON_SRSHL_scalar  = NEON_Q | NEONScalar | NEON_SRSHL,
+  NEON_UQRSHL_scalar = NEON_Q | NEONScalar | NEON_UQRSHL,
+  NEON_SQRSHL_scalar = NEON_Q | NEONScalar | NEON_SQRSHL,
+  NEON_SQDMULH_scalar = NEON_Q | NEONScalar | NEON_SQDMULH,
+  NEON_SQRDMULH_scalar = NEON_Q | NEONScalar | NEON_SQRDMULH,
+
+  // NEON floating point scalar instructions with three same-type operands.
+  NEONScalar3SameFPFixed = NEONScalar3SameFixed | 0x0000C000,
+  NEONScalar3SameFPFMask = NEONScalar3SameFMask | 0x0000C000,
+  NEONScalar3SameFPMask  = NEONScalar3SameMask | 0x00800000,
+  NEON_FACGE_scalar   = NEON_Q | NEONScalar | NEON_FACGE,
+  NEON_FACGT_scalar   = NEON_Q | NEONScalar | NEON_FACGT,
+  NEON_FCMEQ_scalar   = NEON_Q | NEONScalar | NEON_FCMEQ,
+  NEON_FCMGE_scalar   = NEON_Q | NEONScalar | NEON_FCMGE,
+  NEON_FCMGT_scalar   = NEON_Q | NEONScalar | NEON_FCMGT,
+  NEON_FMULX_scalar   = NEON_Q | NEONScalar | NEON_FMULX,
+  NEON_FRECPS_scalar  = NEON_Q | NEONScalar | NEON_FRECPS,
+  NEON_FRSQRTS_scalar = NEON_Q | NEONScalar | NEON_FRSQRTS,
+  NEON_FABD_scalar    = NEON_Q | NEONScalar | NEON_FABD
+};
+
+// NEON scalar instructions with three different-type operands.
+enum NEONScalar3DiffOp : uint32_t {
+  NEONScalar3DiffFixed = 0x5E200000,
+  NEONScalar3DiffFMask = 0xDF200C00,
+  NEONScalar3DiffMask  = NEON_Q | NEONScalar | NEON3DifferentMask,
+  NEON_SQDMLAL_scalar  = NEON_Q | NEONScalar | NEON_SQDMLAL,
+  NEON_SQDMLSL_scalar  = NEON_Q | NEONScalar | NEON_SQDMLSL,
+  NEON_SQDMULL_scalar  = NEON_Q | NEONScalar | NEON_SQDMULL
+};
+
+// NEON scalar instructions with indexed element operand.
+enum NEONScalarByIndexedElementOp : uint32_t {
+  NEONScalarByIndexedElementFixed = 0x5F000000,
+  NEONScalarByIndexedElementFMask = 0xDF000400,
+  NEONScalarByIndexedElementMask  = 0xFF00F400,
+  NEON_SQDMLAL_byelement_scalar  = NEON_Q | NEONScalar | NEON_SQDMLAL_byelement,
+  NEON_SQDMLSL_byelement_scalar  = NEON_Q | NEONScalar | NEON_SQDMLSL_byelement,
+  NEON_SQDMULL_byelement_scalar  = NEON_Q | NEONScalar | NEON_SQDMULL_byelement,
+  NEON_SQDMULH_byelement_scalar  = NEON_Q | NEONScalar | NEON_SQDMULH_byelement,
+  NEON_SQRDMULH_byelement_scalar
+    = NEON_Q | NEONScalar | NEON_SQRDMULH_byelement,
+  NEON_SQRDMLAH_byelement_scalar
+    = NEON_Q | NEONScalar | NEON_SQRDMLAH_byelement,
+  NEON_SQRDMLSH_byelement_scalar
+    = NEON_Q | NEONScalar | NEON_SQRDMLSH_byelement,
+  NEON_FMLA_H_byelement_scalar  = NEON_Q | NEONScalar | NEON_FMLA_H_byelement,
+  NEON_FMLS_H_byelement_scalar  = NEON_Q | NEONScalar | NEON_FMLS_H_byelement,
+  NEON_FMUL_H_byelement_scalar  = NEON_Q | NEONScalar | NEON_FMUL_H_byelement,
+  NEON_FMULX_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_H_byelement,
+
+  // Floating point instructions.
+  NEONScalarByIndexedElementFPFixed
+    = NEONScalarByIndexedElementFixed | 0x00800000,
+  NEONScalarByIndexedElementFPMask
+    = NEONScalarByIndexedElementMask | 0x00800000,
+  NEON_FMLA_byelement_scalar  = NEON_Q | NEONScalar | NEON_FMLA_byelement,
+  NEON_FMLS_byelement_scalar  = NEON_Q | NEONScalar | NEON_FMLS_byelement,
+  NEON_FMUL_byelement_scalar  = NEON_Q | NEONScalar | NEON_FMUL_byelement,
+  NEON_FMULX_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_byelement
+};
+
+// NEON scalar register copy.
+enum NEONScalarCopyOp : uint32_t {
+  NEONScalarCopyFixed = 0x5E000400,
+  NEONScalarCopyFMask = 0xDFE08400,
+  NEONScalarCopyMask  = 0xFFE0FC00,
+  NEON_DUP_ELEMENT_scalar = NEON_Q | NEONScalar | NEON_DUP_ELEMENT
+};
+
+// NEON scalar pairwise instructions.
+enum NEONScalarPairwiseOp : uint32_t {
+  NEONScalarPairwiseFixed = 0x5E300800,
+  NEONScalarPairwiseFMask = 0xDF3E0C00,
+  NEONScalarPairwiseMask  = 0xFFB1F800,
+  NEON_ADDP_scalar      = NEONScalarPairwiseFixed | 0x0081B000,
+  NEON_FMAXNMP_h_scalar = NEONScalarPairwiseFixed | 0x0000C000,
+  NEON_FADDP_h_scalar   = NEONScalarPairwiseFixed | 0x0000D000,
+  NEON_FMAXP_h_scalar   = NEONScalarPairwiseFixed | 0x0000F000,
+  NEON_FMINNMP_h_scalar = NEONScalarPairwiseFixed | 0x0080C000,
+  NEON_FMINP_h_scalar   = NEONScalarPairwiseFixed | 0x0080F000,
+  NEON_FMAXNMP_scalar   = NEONScalarPairwiseFixed | 0x2000C000,
+  NEON_FMINNMP_scalar   = NEONScalarPairwiseFixed | 0x2080C000,
+  NEON_FADDP_scalar     = NEONScalarPairwiseFixed | 0x2000D000,
+  NEON_FMAXP_scalar     = NEONScalarPairwiseFixed | 0x2000F000,
+  NEON_FMINP_scalar     = NEONScalarPairwiseFixed | 0x2080F000
+};
+
+// NEON scalar shift immediate.
+enum NEONScalarShiftImmediateOp : uint32_t {
+  NEONScalarShiftImmediateFixed = 0x5F000400,
+  NEONScalarShiftImmediateFMask = 0xDF800400,
+  NEONScalarShiftImmediateMask  = 0xFF80FC00,
+  NEON_SHL_scalar  = NEON_Q | NEONScalar | NEON_SHL,
+  NEON_SLI_scalar  = NEON_Q | NEONScalar | NEON_SLI,
+  NEON_SRI_scalar  = NEON_Q | NEONScalar | NEON_SRI,
+  NEON_SSHR_scalar = NEON_Q | NEONScalar | NEON_SSHR,
+  NEON_USHR_scalar = NEON_Q | NEONScalar | NEON_USHR,
+  NEON_SRSHR_scalar = NEON_Q | NEONScalar | NEON_SRSHR,
+  NEON_URSHR_scalar = NEON_Q | NEONScalar | NEON_URSHR,
+  NEON_SSRA_scalar = NEON_Q | NEONScalar | NEON_SSRA,
+  NEON_USRA_scalar = NEON_Q | NEONScalar | NEON_USRA,
+  NEON_SRSRA_scalar = NEON_Q | NEONScalar | NEON_SRSRA,
+  NEON_URSRA_scalar = NEON_Q | NEONScalar | NEON_URSRA,
+  NEON_UQSHRN_scalar = NEON_Q | NEONScalar | NEON_UQSHRN,
+  NEON_UQRSHRN_scalar = NEON_Q | NEONScalar | NEON_UQRSHRN,
+  NEON_SQSHRN_scalar = NEON_Q | NEONScalar | NEON_SQSHRN,
+  NEON_SQRSHRN_scalar = NEON_Q | NEONScalar | NEON_SQRSHRN,
+  NEON_SQSHRUN_scalar = NEON_Q | NEONScalar | NEON_SQSHRUN,
+  NEON_SQRSHRUN_scalar = NEON_Q | NEONScalar | NEON_SQRSHRUN,
+  NEON_SQSHLU_scalar = NEON_Q | NEONScalar | NEON_SQSHLU,
+  NEON_SQSHL_imm_scalar  = NEON_Q | NEONScalar | NEON_SQSHL_imm,
+  NEON_UQSHL_imm_scalar  = NEON_Q | NEONScalar | NEON_UQSHL_imm,
+  NEON_SCVTF_imm_scalar = NEON_Q | NEONScalar | NEON_SCVTF_imm,
+  NEON_UCVTF_imm_scalar = NEON_Q | NEONScalar | NEON_UCVTF_imm,
+  NEON_FCVTZS_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZS_imm,
+  NEON_FCVTZU_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_imm
+};
+
+enum ReservedOp : uint32_t {
+  ReservedFixed = 0x00000000,
+  ReservedFMask = 0x1E000000,
+  ReservedMask = 0xFFFF0000,
+
+  UDF = ReservedFixed | 0x00000000
+};
+
+// Unimplemented and unallocated instructions. These are defined to make fixed
+// bit assertion easier.
+enum UnimplementedOp : uint32_t {
+  UnimplementedFixed = 0x00000000,
+  UnimplementedFMask = 0x00000000
+};
+
+enum UnallocatedOp : uint32_t {
+  UnallocatedFixed = 0x00000000,
+  UnallocatedFMask = 0x00000000
+};
+
+// Instruction bit pattern for an undefined instruction, that will trigger a
+// SIGILL at runtime.
+//
+// A couple of strategies we can use here. There are no unencoded
+// instructions in the instruction set that are guaranteed to remain that
+// way.  However there are some currently (as of 2018) unencoded
+// instructions that are good candidates.
+//
+// Ideally, unencoded instructions should be non-destructive to the register
+// state, and should be unencoded at all exception levels.
+//
+// At the trap the pc will hold the address of the offending instruction.
+//
+// Some candidates for unencoded instructions:
+//
+// 0xd4a00000 (essentially dcps0, a good one since it is nonsensical and may
+//             remain unencoded in the future for that reason)
+// 0x33000000 (bfm variant)
+// 0xd67f0000 (br variant)
+// 0x5ac00c00 (rbit variant)
+//
+// This instruction is "dcps0", also has 16-bit payload if needed.
+static constexpr uint32_t UNDEFINED_INST_PATTERN = 0xd4a00000;
+
+}  // namespace vixl
+
+#endif  // VIXL_A64_CONSTANTS_A64_H_
diff --git a/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp b/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp
new file mode 100644
index 0000000000..f31c22fbf5
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp
@@ -0,0 +1,231 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#include "jit/arm64/vixl/Cpu-Features-vixl.h"
+
+#include <ostream>
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+#define VIXL_USE_AARCH64_CPU_HELPERS
+
+namespace vixl {
+
+static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) {
+  if (feature == CPUFeatures::kNone) {
+    return 0;
+  } else {
+    // Check that the shift is well-defined, and that the feature is valid.
+    VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <=
+                       (sizeof(uint64_t) * 8));
+    VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures);
+    return UINT64_C(1) << feature;
+  }
+}
+
+CPUFeatures::CPUFeatures(Feature feature0,
+                         Feature feature1,
+                         Feature feature2,
+                         Feature feature3)
+    : features_(0) {
+  Combine(feature0, feature1, feature2, feature3);
+}
+
+CPUFeatures CPUFeatures::All() {
+  CPUFeatures all;
+  // Check that the shift is well-defined.
+  VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8));
+  all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1;
+  return all;
+}
+
+CPUFeatures CPUFeatures::InferFromIDRegisters() {
+  // This function assumes that kIDRegisterEmulation is available.
+  CPUFeatures features(CPUFeatures::kIDRegisterEmulation);
+#ifdef VIXL_USE_AARCH64_CPU_HELPERS
+  // Note that the Linux kernel filters these values during emulation, so the
+  // results may not exactly match the expected hardware support.
+  features.Combine(CPU::InferCPUFeaturesFromIDRegisters());
+#endif
+  return features;
+}
+
+CPUFeatures CPUFeatures::InferFromOS(QueryIDRegistersOption option) {
+#ifdef VIXL_USE_AARCH64_CPU_HELPERS
+  return CPU::InferCPUFeaturesFromOS(option);
+#else
+  USE(option);
+  return CPUFeatures();
+#endif
+}
+
+void CPUFeatures::Combine(const CPUFeatures& other) {
+  features_ |= other.features_;
+}
+
+void CPUFeatures::Combine(Feature feature0,
+                          Feature feature1,
+                          Feature feature2,
+                          Feature feature3) {
+  features_ |= MakeFeatureMask(feature0);
+  features_ |= MakeFeatureMask(feature1);
+  features_ |= MakeFeatureMask(feature2);
+  features_ |= MakeFeatureMask(feature3);
+}
+
+void CPUFeatures::Remove(const CPUFeatures& other) {
+  features_ &= ~other.features_;
+}
+
+void CPUFeatures::Remove(Feature feature0,
+                         Feature feature1,
+                         Feature feature2,
+                         Feature feature3) {
+  features_ &= ~MakeFeatureMask(feature0);
+  features_ &= ~MakeFeatureMask(feature1);
+  features_ &= ~MakeFeatureMask(feature2);
+  features_ &= ~MakeFeatureMask(feature3);
+}
+
+CPUFeatures CPUFeatures::With(const CPUFeatures& other) const {
+  CPUFeatures f(*this);
+  f.Combine(other);
+  return f;
+}
+
+CPUFeatures CPUFeatures::With(Feature feature0,
+                              Feature feature1,
+                              Feature feature2,
+                              Feature feature3) const {
+  CPUFeatures f(*this);
+  f.Combine(feature0, feature1, feature2, feature3);
+  return f;
+}
+
+CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const {
+  CPUFeatures f(*this);
+  f.Remove(other);
+  return f;
+}
+
+CPUFeatures CPUFeatures::Without(Feature feature0,
+                                 Feature feature1,
+                                 Feature feature2,
+                                 Feature feature3) const {
+  CPUFeatures f(*this);
+  f.Remove(feature0, feature1, feature2, feature3);
+  return f;
+}
+
+bool CPUFeatures::Has(const CPUFeatures& other) const {
+  return (features_ & other.features_) == other.features_;
+}
+
+bool CPUFeatures::Has(Feature feature0,
+                      Feature feature1,
+                      Feature feature2,
+                      Feature feature3) const {
+  uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) |
+                  MakeFeatureMask(feature2) | MakeFeatureMask(feature3);
+  return (features_ & mask) == mask;
+}
+
+size_t CPUFeatures::Count() const { return CountSetBits(features_); }
+
+std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) {
+  // clang-format off
+  switch (feature) {
+#define VIXL_FORMAT_FEATURE(SYMBOL, NAME, CPUINFO) \
+    case CPUFeatures::SYMBOL:                      \
+      return os << NAME;
+VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE)
+#undef VIXL_FORMAT_FEATURE
+    case CPUFeatures::kNone:
+      return os << "none";
+    case CPUFeatures::kNumberOfFeatures:
+      VIXL_UNREACHABLE();
+  }
+  // clang-format on
+  VIXL_UNREACHABLE();
+  return os;
+}
+
+CPUFeatures::const_iterator CPUFeatures::begin() const {
+  if (features_ == 0) return const_iterator(this, kNone);
+
+  int feature_number = CountTrailingZeros(features_);
+  vixl::CPUFeatures::Feature feature =
+      static_cast<CPUFeatures::Feature>(feature_number);
+  return const_iterator(this, feature);
+}
+
+CPUFeatures::const_iterator CPUFeatures::end() const {
+  return const_iterator(this, kNone);
+}
+
+std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) {
+  CPUFeatures::const_iterator it = features.begin();
+  while (it != features.end()) {
+    os << *it;
+    ++it;
+    if (it != features.end()) os << ", ";
+  }
+  return os;
+}
+
+bool CPUFeaturesConstIterator::operator==(
+    const CPUFeaturesConstIterator& other) const {
+  VIXL_ASSERT(IsValid());
+  return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_);
+}
+
+CPUFeatures::Feature CPUFeaturesConstIterator::operator++() {  // Prefix
+  VIXL_ASSERT(IsValid());
+  do {
+    // Find the next feature. The order is unspecified.
+    feature_ = static_cast<CPUFeatures::Feature>(feature_ + 1);
+    if (feature_ == CPUFeatures::kNumberOfFeatures) {
+      feature_ = CPUFeatures::kNone;
+      VIXL_STATIC_ASSERT(CPUFeatures::kNone == -1);
+    }
+    VIXL_ASSERT(CPUFeatures::kNone <= feature_);
+    VIXL_ASSERT(feature_ < CPUFeatures::kNumberOfFeatures);
+    // cpu_features_->Has(kNone) is always true, so this will terminate even if
+    // the features list is empty.
+  } while (!cpu_features_->Has(feature_));
+  return feature_;
+}
+
+CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) {  // Postfix
+  CPUFeatures::Feature result = feature_;
+  ++(*this);
+  return result;
+}
+
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Cpu-Features-vixl.h b/js/src/jit/arm64/vixl/Cpu-Features-vixl.h
new file mode 100644
index 0000000000..b980233bf2
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Cpu-Features-vixl.h
@@ -0,0 +1,397 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CPU_FEATURES_H
+#define VIXL_CPU_FEATURES_H
+
+#include <ostream>
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+
+namespace vixl {
+
+
+// clang-format off
+#define VIXL_CPU_FEATURE_LIST(V)                                               \
+  /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
+  /* registers, so that the detailed feature registers can be read          */ \
+  /* directly.                                                              */ \
+  V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
+                                                                               \
+  V(kFP,                  "FP",                     "fp")                      \
+  V(kNEON,                "NEON",                   "asimd")                   \
+  V(kCRC32,               "CRC32",                  "crc32")                   \
+  /* Cryptographic support instructions.                                    */ \
+  V(kAES,                 "AES",                    "aes")                     \
+  V(kSHA1,                "SHA1",                   "sha1")                    \
+  V(kSHA2,                "SHA2",                   "sha2")                    \
+  /* A form of PMULL{2} with a 128-bit (1Q) result.                         */ \
+  V(kPmull1Q,             "Pmull1Q",                "pmull")                   \
+  /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc.              */ \
+  V(kAtomics,             "Atomics",                "atomics")                 \
+  /* Limited ordering regions: LDLAR, STLLR and their variants.             */ \
+  V(kLORegions,           "LORegions",              NULL)                      \
+  /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
+  V(kRDM,                 "RDM",                    "asimdrdm")                \
+  /* Scalable Vector Extension.                                             */ \
+  V(kSVE,                 "SVE",                    "sve")                     \
+  /* SDOT and UDOT support (in NEON).                                       */ \
+  V(kDotProduct,          "DotProduct",             "asimddp")                 \
+  /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
+  V(kFPHalf,              "FPHalf",                 "fphp")                    \
+  V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
+  /* The RAS extension, including the ESB instruction.                      */ \
+  V(kRAS,                 "RAS",                    NULL)                      \
+  /* Data cache clean to the point of persistence: DC CVAP.                 */ \
+  V(kDCPoP,               "DCPoP",                  "dcpop")                   \
+  /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
+  V(kDCCVADP,             "DCCVADP",                NULL)                      \
+  /* Cryptographic support instructions.                                    */ \
+  V(kSHA3,                "SHA3",                   "sha3")                    \
+  V(kSHA512,              "SHA512",                 "sha512")                  \
+  V(kSM3,                 "SM3",                    "sm3")                     \
+  V(kSM4,                 "SM4",                    "sm4")                     \
+  /* Pointer authentication for addresses.                                  */ \
+  V(kPAuth,               "PAuth",                  NULL)                      \
+  /* Pointer authentication for addresses uses QARMA.                       */ \
+  V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
+  /* Generic authentication (using the PACGA instruction).                  */ \
+  V(kPAuthGeneric,        "PAuthGeneric",           NULL)                      \
+  /* Generic authentication uses QARMA.                                     */ \
+  V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
+  /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
+  V(kJSCVT,               "JSCVT",                  "jscvt")                   \
+  /* Complex number support for NEON: FCMLA and FCADD.                      */ \
+  V(kFcma,                "Fcma",                   "fcma")                    \
+  /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
+  V(kRCpc,                "RCpc",                   "lrcpc")                   \
+  V(kRCpcImm,             "RCpc (imm)",             "ilrcpc")                  \
+  /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF.               */ \
+  V(kFlagM,               "FlagM",                  "flagm")                   \
+  /* Unaligned single-copy atomicity.                                       */ \
+  V(kUSCAT,               "USCAT",                  "uscat")                   \
+  /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}.         */ \
+  V(kFHM,                 "FHM",                    "asimdfhm")                \
+  /* Data-independent timing (for selected instructions).                   */ \
+  V(kDIT,                 "DIT",                    "dit")                     \
+  /* Branch target identification.                                          */ \
+  V(kBTI,                 "BTI",                    NULL)                      \
+  /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
+  V(kAXFlag,              "AXFlag",                 NULL)                      \
+  /* Random number generation extension,                                    */ \
+  V(kRNG,                 "RNG",                    NULL)                      \
+  /* Floating-point round to {32,64}-bit integer.                           */ \
+  V(kFrintToFixedSizedInt,"Frint (bounded)",        NULL)
+// clang-format on
+
+
+class CPUFeaturesConstIterator;
+
+// A representation of the set of features known to be supported by the target
+// device. Each feature is represented by a simple boolean flag.
+//
+//   - When the Assembler is asked to assemble an instruction, it asserts (in
+//     debug mode) that the necessary features are available.
+//
+//   - TODO: The MacroAssembler relies on the Assembler's assertions, but in
+//     some cases it may be useful for macros to generate a fall-back sequence
+//     in case features are not available.
+//
+//   - The Simulator assumes by default that all features are available, but it
+//     is possible to configure it to fail if the simulated code uses features
+//     that are not enabled.
+//
+//     The Simulator also offers pseudo-instructions to allow features to be
+//     enabled and disabled dynamically. This is useful when you want to ensure
+//     that some features are constrained to certain areas of code.
+//
+//   - The base Disassembler knows nothing about CPU features, but the
+//     PrintDisassembler can be configured to annotate its output with warnings
+//     about unavailable features. The Simulator uses this feature when
+//     instruction trace is enabled.
+//
+//   - The Decoder-based components -- the Simulator and PrintDisassembler --
+//     rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of
+//     features actually encountered so that a large block of code can be
+//     examined (either directly or through simulation), and the required
+//     features analysed later.
+//
+// Expected usage:
+//
+//     // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for
+//     // compatibility with older version of VIXL.
+//     MacroAssembler masm;
+//
+//     // Generate code only for the current CPU.
+//     masm.SetCPUFeatures(CPUFeatures::InferFromOS());
+//
+//     // Turn off feature checking entirely.
+//     masm.SetCPUFeatures(CPUFeatures::All());
+//
+// Feature set manipulation:
+//
+//     CPUFeatures f;  // The default constructor gives an empty set.
+//     // Individual features can be added (or removed).
+//     f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES);
+//     f.Remove(CPUFeatures::kNEON);
+//
+//     // Some helpers exist for extensions that provide several features.
+//     f.Remove(CPUFeatures::All());
+//     f.Combine(CPUFeatures::AArch64LegacyBaseline());
+//
+//     // Chained construction is also possible.
+//     CPUFeatures g =
+//         f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32);
+//
+//     // Features can be queried. Where multiple features are given, they are
+//     // combined with logical AND.
+//     if (h.Has(CPUFeatures::kNEON)) { ... }
+//     if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... }
+//     if (h.Has(g)) { ... }
+//     // If the empty set is requested, the result is always 'true'.
+//     VIXL_ASSERT(h.Has(CPUFeatures()));
+//
+//     // For debug and reporting purposes, features can be enumerated (or
+//     // printed directly):
+//     std::cout << CPUFeatures::kNEON;  // Prints something like "NEON".
+//     std::cout << f;  // Prints something like "FP, NEON, CRC32".
+class CPUFeatures {
+ public:
+  // clang-format off
+  // Individual features.
+  // These should be treated as opaque tokens. User code should not rely on
+  // specific numeric values or ordering.
+  enum Feature {
+    // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that
+    // this class supports.
+
+    kNone = -1,
+#define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL,
+    VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE)
+#undef VIXL_DECLARE_FEATURE
+    kNumberOfFeatures
+  };
+  // clang-format on
+
+  // By default, construct with no features enabled.
+  CPUFeatures() : features_(0) {}
+
+  // Construct with some features already enabled.
+  CPUFeatures(Feature feature0,
+              Feature feature1 = kNone,
+              Feature feature2 = kNone,
+              Feature feature3 = kNone);
+
+  // Construct with all features enabled. This can be used to disable feature
+  // checking: `Has(...)` returns true regardless of the argument.
+  static CPUFeatures All();
+
+  // Construct an empty CPUFeatures. This is equivalent to the default
+  // constructor, but is provided for symmetry and convenience.
+  static CPUFeatures None() { return CPUFeatures(); }
+
+  // The presence of these features was assumed by version of VIXL before this
+  // API was added, so using this set by default ensures API compatibility.
+  static CPUFeatures AArch64LegacyBaseline() {
+    return CPUFeatures(kFP, kNEON, kCRC32);
+  }
+
+  // Construct a new CPUFeatures object using ID registers. This assumes that
+  // kIDRegisterEmulation is present.
+  static CPUFeatures InferFromIDRegisters();
+
+  enum QueryIDRegistersOption {
+    kDontQueryIDRegisters,
+    kQueryIDRegistersIfAvailable
+  };
+
+  // Construct a new CPUFeatures object based on what the OS reports.
+  static CPUFeatures InferFromOS(
+      QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);
+
+  // Combine another CPUFeatures object into this one. Features that already
+  // exist in this set are left unchanged.
+  void Combine(const CPUFeatures& other);
+
+  // Combine specific features into this set. Features that already exist in
+  // this set are left unchanged.
+  void Combine(Feature feature0,
+               Feature feature1 = kNone,
+               Feature feature2 = kNone,
+               Feature feature3 = kNone);
+
+  // Remove features in another CPUFeatures object from this one.
+  void Remove(const CPUFeatures& other);
+
+  // Remove specific features from this set.
+  void Remove(Feature feature0,
+              Feature feature1 = kNone,
+              Feature feature2 = kNone,
+              Feature feature3 = kNone);
+
+  // Chaining helpers for convenient construction.
+  CPUFeatures With(const CPUFeatures& other) const;
+  CPUFeatures With(Feature feature0,
+                   Feature feature1 = kNone,
+                   Feature feature2 = kNone,
+                   Feature feature3 = kNone) const;
+  CPUFeatures Without(const CPUFeatures& other) const;
+  CPUFeatures Without(Feature feature0,
+                      Feature feature1 = kNone,
+                      Feature feature2 = kNone,
+                      Feature feature3 = kNone) const;
+
+  // Query features.
+  // Note that an empty query (like `Has(kNone)`) always returns true.
+  bool Has(const CPUFeatures& other) const;
+  bool Has(Feature feature0,
+           Feature feature1 = kNone,
+           Feature feature2 = kNone,
+           Feature feature3 = kNone) const;
+
+  // Return the number of enabled features.
+  size_t Count() const;
+  bool HasNoFeatures() const { return Count() == 0; }
+
+  // Check for equivalence.
+  bool operator==(const CPUFeatures& other) const {
+    return Has(other) && other.Has(*this);
+  }
+  bool operator!=(const CPUFeatures& other) const { return !(*this == other); }
+
+  typedef CPUFeaturesConstIterator const_iterator;
+
+  const_iterator begin() const;
+  const_iterator end() const;
+
+ private:
+  // Each bit represents a feature. This field will be replaced as needed if
+  // features are added.
+  uint64_t features_;
+
+  friend std::ostream& operator<<(std::ostream& os,
+                                  const vixl::CPUFeatures& features);
+};
+
+std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature);
+std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features);
+
+// This is not a proper C++ iterator type, but it simulates enough of
+// ForwardIterator that simple loops can be written.
+class CPUFeaturesConstIterator {
+ public:
+  CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL,
+                           CPUFeatures::Feature start = CPUFeatures::kNone)
+      : cpu_features_(cpu_features), feature_(start) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool operator==(const CPUFeaturesConstIterator& other) const;
+  bool operator!=(const CPUFeaturesConstIterator& other) const {
+    return !(*this == other);
+  }
+  CPUFeatures::Feature operator++();
+  CPUFeatures::Feature operator++(int);
+
+  CPUFeatures::Feature operator*() const {
+    VIXL_ASSERT(IsValid());
+    return feature_;
+  }
+
+  // For proper support of C++'s simplest "Iterator" concept, this class would
+  // have to define member types (such as CPUFeaturesIterator::pointer) to make
+  // it appear as if it iterates over Feature objects in memory. That is, we'd
+  // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator.
+  // This is at least partially possible -- the std::vector<bool> specialisation
+  // does something similar -- but it doesn't seem worthwhile for a
+  // special-purpose debug helper, so they are omitted here.
+ private:
+  const CPUFeatures* cpu_features_;
+  CPUFeatures::Feature feature_;
+
+  bool IsValid() const {
+    return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) ||
+           cpu_features_->Has(feature_);
+  }
+};
+
+// A convenience scope for temporarily modifying a CPU features object. This
+// allows features to be enabled for short sequences.
+//
+// Expected usage:
+//
+//  {
+//    CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32);
+//    // This scope can now use CRC32, as well as anything else that was enabled
+//    // before the scope.
+//
+//    ...
+//
+//    // At the end of the scope, the original CPU features are restored.
+//  }
+class CPUFeaturesScope {
+ public:
+  // Start a CPUFeaturesScope on any object that implements
+  // `CPUFeatures* GetCPUFeatures()`.
+  template <typename T>
+  explicit CPUFeaturesScope(T* cpu_features_wrapper,
+                            CPUFeatures::Feature feature0 = CPUFeatures::kNone,
+                            CPUFeatures::Feature feature1 = CPUFeatures::kNone,
+                            CPUFeatures::Feature feature2 = CPUFeatures::kNone,
+                            CPUFeatures::Feature feature3 = CPUFeatures::kNone)
+      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
+        old_features_(*cpu_features_) {
+    cpu_features_->Combine(feature0, feature1, feature2, feature3);
+  }
+
+  template <typename T>
+  CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
+      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
+        old_features_(*cpu_features_) {
+    cpu_features_->Combine(other);
+  }
+
+  ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
+
+  // For advanced usage, the CPUFeatures object can be accessed directly.
+  // The scope will restore the original state when it ends.
+
+  CPUFeatures* GetCPUFeatures() const { return cpu_features_; }
+
+  void SetCPUFeatures(const CPUFeatures& cpu_features) {
+    *cpu_features_ = cpu_features;
+  }
+
+ private:
+  CPUFeatures* const cpu_features_;
+  const CPUFeatures old_features_;
+};
+
+
+}  // namespace vixl
+
+#endif  // VIXL_CPU_FEATURES_H
diff --git a/js/src/jit/arm64/vixl/Cpu-vixl.cpp b/js/src/jit/arm64/vixl/Cpu-vixl.cpp
new file mode 100644
index 0000000000..12244e73e4
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Cpu-vixl.cpp
@@ -0,0 +1,256 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+
+#include "jstypes.h"
+
+#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
+#include <sys/auxv.h>
+#define VIXL_USE_LINUX_HWCAP 1
+#endif
+
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+
+namespace vixl {
+
+
+const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
+const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
+const IDRegister::Field AA64PFR0::kSVE(32);
+const IDRegister::Field AA64PFR0::kDIT(48);
+
+const IDRegister::Field AA64PFR1::kBT(0);
+
+const IDRegister::Field AA64ISAR0::kAES(4);
+const IDRegister::Field AA64ISAR0::kSHA1(8);
+const IDRegister::Field AA64ISAR0::kSHA2(12);
+const IDRegister::Field AA64ISAR0::kCRC32(16);
+const IDRegister::Field AA64ISAR0::kAtomic(20);
+const IDRegister::Field AA64ISAR0::kRDM(28);
+const IDRegister::Field AA64ISAR0::kSHA3(32);
+const IDRegister::Field AA64ISAR0::kSM3(36);
+const IDRegister::Field AA64ISAR0::kSM4(40);
+const IDRegister::Field AA64ISAR0::kDP(44);
+const IDRegister::Field AA64ISAR0::kFHM(48);
+const IDRegister::Field AA64ISAR0::kTS(52);
+
+const IDRegister::Field AA64ISAR1::kDPB(0);
+const IDRegister::Field AA64ISAR1::kAPA(4);
+const IDRegister::Field AA64ISAR1::kAPI(8);
+const IDRegister::Field AA64ISAR1::kJSCVT(12);
+const IDRegister::Field AA64ISAR1::kFCMA(16);
+const IDRegister::Field AA64ISAR1::kLRCPC(20);
+const IDRegister::Field AA64ISAR1::kGPA(24);
+const IDRegister::Field AA64ISAR1::kGPI(28);
+const IDRegister::Field AA64ISAR1::kFRINTTS(32);
+const IDRegister::Field AA64ISAR1::kSB(36);
+const IDRegister::Field AA64ISAR1::kSPECRES(40);
+
+const IDRegister::Field AA64MMFR1::kLO(16);
+
+CPUFeatures AA64PFR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
+  if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
+  if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
+  if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
+  if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
+  if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
+  return f;
+}
+
+CPUFeatures AA64PFR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
+  return f;
+}
+
+CPUFeatures AA64ISAR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
+  if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
+  if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
+  if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
+  if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
+  if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
+  if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
+  if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
+  if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
+  if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
+  if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
+  if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
+  if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
+  if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
+  if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
+  return f;
+}
+
+CPUFeatures AA64ISAR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
+  if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
+  if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
+  if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
+  if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
+  if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
+
+  if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth);
+  if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA);
+  if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
+  if (Get(kGPA) >= 1) {
+    f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
+  }
+  return f;
+}
+
+CPUFeatures AA64MMFR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
+  return f;
+}
+
+int IDRegister::Get(IDRegister::Field field) const {
+  int msb = field.GetMsb();
+  int lsb = field.GetLsb();
+  VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
+                     (sizeof(int) * kBitsPerByte));
+  switch (field.GetType()) {
+    case Field::kSigned:
+      return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
+    case Field::kUnsigned:
+      return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
+  CPUFeatures f;
+#define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures());
+  VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
+#undef VIXL_COMBINE_ID_REG
+  return f;
+}
+
+CPUFeatures CPU::InferCPUFeaturesFromOS(
+    CPUFeatures::QueryIDRegistersOption option) {
+  CPUFeatures features;
+
+#if VIXL_USE_LINUX_HWCAP
+  // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
+  // than explicit bits, but explicit bits allow us to identify features that
+  // the toolchain doesn't know about.
+  static const CPUFeatures::Feature kFeatureBits[] = {
+      // Bits 0-7
+      CPUFeatures::kFP,
+      CPUFeatures::kNEON,
+      CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
+      CPUFeatures::kAES,
+      CPUFeatures::kPmull1Q,
+      CPUFeatures::kSHA1,
+      CPUFeatures::kSHA2,
+      CPUFeatures::kCRC32,
+      // Bits 8-15
+      CPUFeatures::kAtomics,
+      CPUFeatures::kFPHalf,
+      CPUFeatures::kNEONHalf,
+      CPUFeatures::kIDRegisterEmulation,
+      CPUFeatures::kRDM,
+      CPUFeatures::kJSCVT,
+      CPUFeatures::kFcma,
+      CPUFeatures::kRCpc,
+      // Bits 16-23
+      CPUFeatures::kDCPoP,
+      CPUFeatures::kSHA3,
+      CPUFeatures::kSM3,
+      CPUFeatures::kSM4,
+      CPUFeatures::kDotProduct,
+      CPUFeatures::kSHA512,
+      CPUFeatures::kSVE,
+      CPUFeatures::kFHM,
+      // Bits 24-27
+      CPUFeatures::kDIT,
+      CPUFeatures::kUSCAT,
+      CPUFeatures::kRCpcImm,
+      CPUFeatures::kFlagM
+      // Bits 28-31 are unassigned.
+  };
+  static const size_t kFeatureBitCount =
+      sizeof(kFeatureBits) / sizeof(kFeatureBits[0]);
+
+  // Mozilla change: Set the default for the simulator.
+#ifdef JS_SIMULATOR_ARM64
+  unsigned long auxv = ~(0UL);  // Enable all features for the Simulator.
+#else
+  unsigned long auxv = getauxval(AT_HWCAP);  // NOLINT(runtime/int)
+#endif
+
+  VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte));
+  for (size_t i = 0; i < kFeatureBitCount; i++) {
+    if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]);
+  }
+#elif defined(XP_MACOSX)
+  // Apple processors have kJSCVT, kDotProduct, and kAtomics features.
+  features.Combine(CPUFeatures::kJSCVT, CPUFeatures::kDotProduct,
+                   CPUFeatures::kAtomics);
+#endif  // VIXL_USE_LINUX_HWCAP
+
+  if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
+      (features.Has(CPUFeatures::kIDRegisterEmulation))) {
+    features.Combine(InferCPUFeaturesFromIDRegisters());
+  }
+  return features;
+}
+
+
+#ifdef __aarch64__
+#define VIXL_READ_ID_REG(NAME)                         \
+  NAME CPU::Read##NAME() {                             \
+    uint64_t value = 0;                                \
+    __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \
+    return NAME(value);                                \
+  }
+#else  // __aarch64__
+#define VIXL_READ_ID_REG(NAME)                                        \
+  NAME CPU::Read##NAME() {                                            \
+    /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \
+    VIXL_ABORT();                                                     \
+  }
+#endif  // __aarch64__
+
+VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
+
+#undef VIXL_READ_ID_REG
+
+
+// Initialise to smallest possible cache size.
+unsigned CPU::dcache_line_size_ = 1;
+unsigned CPU::icache_line_size_ = 1;
+
+
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Cpu-vixl.h b/js/src/jit/arm64/vixl/Cpu-vixl.h
new file mode 100644
index 0000000000..4db51aad6b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Cpu-vixl.h
@@ -0,0 +1,241 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CPU_AARCH64_H
+#define VIXL_CPU_AARCH64_H
+
+#include "jit/arm64/vixl/Cpu-Features-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+#include "jit/arm64/vixl/Instructions-vixl.h"
+
+#ifndef VIXL_INCLUDE_TARGET_AARCH64
+// The supporting .cc file is only compiled when the A64 target is selected.
+// Throw an explicit error now to avoid a harder-to-debug linker error later.
+//
+// These helpers _could_ work on any AArch64 host, even when generating AArch32
+// code, but we don't support this because the available features may differ
+// between AArch32 and AArch64 on the same platform, so basing AArch32 code
+// generation on aarch64::CPU features is probably broken.
+#error cpu-aarch64.h requires VIXL_INCLUDE_TARGET_AARCH64 (scons target=a64).
+#endif
+
+namespace vixl {
+
+// A CPU ID register, for use with CPUFeatures::kIDRegisterEmulation. Fields
+// specific to each register are described in relevant subclasses.
+class IDRegister {
+ protected:
+  explicit IDRegister(uint64_t value = 0) : value_(value) {}
+
+  class Field {
+   public:
+    enum Type { kUnsigned, kSigned };
+
+    explicit Field(int lsb, Type type = kUnsigned) : lsb_(lsb), type_(type) {}
+
+    static const int kMaxWidthInBits = 4;
+
+    int GetWidthInBits() const {
+      // All current ID fields have four bits.
+      return kMaxWidthInBits;
+    }
+    int GetLsb() const { return lsb_; }
+    int GetMsb() const { return lsb_ + GetWidthInBits() - 1; }
+    Type GetType() const { return type_; }
+
+   private:
+    int lsb_;
+    Type type_;
+  };
+
+ public:
+  // Extract the specified field, performing sign-extension for signed fields.
+  // This allows us to implement the 'value >= number' detection mechanism
+  // recommended by the Arm ARM, for both signed and unsigned fields.
+  int Get(Field field) const;
+
+ private:
+  uint64_t value_;
+};
+
+class AA64PFR0 : public IDRegister {
+ public:
+  explicit AA64PFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kFP;
+  static const Field kAdvSIMD;
+  static const Field kSVE;
+  static const Field kDIT;
+};
+
+class AA64PFR1 : public IDRegister {
+ public:
+  explicit AA64PFR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kBT;
+};
+
+class AA64ISAR0 : public IDRegister {
+ public:
+  explicit AA64ISAR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kAES;
+  static const Field kSHA1;
+  static const Field kSHA2;
+  static const Field kCRC32;
+  static const Field kAtomic;
+  static const Field kRDM;
+  static const Field kSHA3;
+  static const Field kSM3;
+  static const Field kSM4;
+  static const Field kDP;
+  static const Field kFHM;
+  static const Field kTS;
+};
+
+class AA64ISAR1 : public IDRegister {
+ public:
+  explicit AA64ISAR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kDPB;
+  static const Field kAPA;
+  static const Field kAPI;
+  static const Field kJSCVT;
+  static const Field kFCMA;
+  static const Field kLRCPC;
+  static const Field kGPA;
+  static const Field kGPI;
+  static const Field kFRINTTS;
+  static const Field kSB;
+  static const Field kSPECRES;
+};
+
+class AA64MMFR1 : public IDRegister {
+ public:
+  explicit AA64MMFR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kLO;
+};
+
+class CPU {
+ public:
+  // Initialise CPU support.
+  static void SetUp();
+
+  // Ensures the data at a given address and with a given size is the same for
+  // the I and D caches. I and D caches are not automatically coherent on ARM
+  // so this operation is required before any dynamically generated code can
+  // safely run.
+  static void EnsureIAndDCacheCoherency(void* address, size_t length);
+
+  // Flush the local instruction pipeline, forcing a reload of any instructions
+  // beyond this barrier from the icache.
+  static void FlushExecutionContext();
+
+  // Read and interpret the ID registers. This requires
+  // CPUFeatures::kIDRegisterEmulation, and therefore cannot be called on
+  // non-AArch64 platforms.
+  static CPUFeatures InferCPUFeaturesFromIDRegisters();
+
+  // Read and interpret CPUFeatures reported by the OS. Failed queries (or
+  // unsupported platforms) return an empty list. Note that this is
+  // indistinguishable from a successful query on a platform that advertises no
+  // features.
+  //
+  // Non-AArch64 hosts are considered to be unsupported platforms, and this
+  // function returns an empty list.
+  static CPUFeatures InferCPUFeaturesFromOS(
+      CPUFeatures::QueryIDRegistersOption option =
+          CPUFeatures::kQueryIDRegistersIfAvailable);
+
+  // Handle tagged pointers.
+  template <typename T>
+  static T SetPointerTag(T pointer, uint64_t tag) {
+    VIXL_ASSERT(IsUintN(kAddressTagWidth, tag));
+
+    // Use C-style casts to get static_cast behaviour for integral types (T),
+    // and reinterpret_cast behaviour for other types.
+
+    uint64_t raw = (uint64_t)pointer;
+    VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+    raw = (raw & ~kAddressTagMask) | (tag << kAddressTagOffset);
+    return (T)raw;
+  }
+
+  template <typename T>
+  static uint64_t GetPointerTag(T pointer) {
+    // Use C-style casts to get static_cast behaviour for integral types (T),
+    // and reinterpret_cast behaviour for other types.
+
+    uint64_t raw = (uint64_t)pointer;
+    VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+    return (raw & kAddressTagMask) >> kAddressTagOffset;
+  }
+
+ private:
+#define VIXL_AARCH64_ID_REG_LIST(V) \
+  V(AA64PFR0)                       \
+  V(AA64PFR1)                       \
+  V(AA64ISAR0)                      \
+  V(AA64ISAR1)                      \
+  V(AA64MMFR1)
+
+#define VIXL_READ_ID_REG(NAME) static NAME Read##NAME();
+  // On native AArch64 platforms, read the named CPU ID registers. These require
+  // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64
+  // platforms.
+  VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
+#undef VIXL_READ_ID_REG
+
+  // Return the content of the cache type register.
+  static uint32_t GetCacheType();
+
+  // I and D cache line size in bytes.
+  static unsigned icache_line_size_;
+  static unsigned dcache_line_size_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_CPU_AARCH64_H
diff --git a/js/src/jit/arm64/vixl/Debugger-vixl.cpp b/js/src/jit/arm64/vixl/Debugger-vixl.cpp
new file mode 100644
index 0000000000..fa3e15601e
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Debugger-vixl.cpp
@@ -0,0 +1,1535 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+// OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jstypes.h"
+
+#ifdef JS_SIMULATOR_ARM64
+
+#include "jit/arm64/vixl/Debugger-vixl.h"
+
+#include "mozilla/Vector.h"
+
+#include "js/AllocPolicy.h"
+
+namespace vixl {
+
+// List of commands supported by the debugger.
+#define DEBUG_COMMAND_LIST(C)  \
+C(HelpCommand)                 \
+C(ContinueCommand)             \
+C(StepCommand)                 \
+C(DisasmCommand)               \
+C(PrintCommand)                \
+C(ExamineCommand)
+
+// Debugger command lines are broken up in token of different type to make
+// processing easier later on.
+class Token {
+ public:
+  virtual ~Token() {}
+
+  // Token type.
+  virtual bool IsRegister() const { return false; }
+  virtual bool IsFPRegister() const { return false; }
+  virtual bool IsIdentifier() const { return false; }
+  virtual bool IsAddress() const { return false; }
+  virtual bool IsInteger() const { return false; }
+  virtual bool IsFormat() const { return false; }
+  virtual bool IsUnknown() const { return false; }
+  // Token properties.
+  virtual bool CanAddressMemory() const { return false; }
+  virtual uint8_t* ToAddress(Debugger* debugger) const = 0;
+  virtual void Print(FILE* out = stdout) const = 0;
+
+  static Token* Tokenize(const char* arg);
+};
+
+typedef mozilla::Vector<Token*, 0, js::SystemAllocPolicy> TokenVector;
+
+// Tokens often hold one value.
+template<typename T> class ValueToken : public Token {
+ public:
+  explicit ValueToken(T value) : value_(value) {}
+  ValueToken() {}
+
+  T value() const { return value_; }
+
+  virtual uint8_t* ToAddress(Debugger* debugger) const override {
+    USE(debugger);
+    VIXL_ABORT();
+  }
+
+ protected:
+  T value_;
+};
+
+// Integer registers (X or W) and their aliases.
+// Format: wn or xn with 0 <= n < 32 or a name in the aliases list.
+class RegisterToken : public ValueToken<const Register> {
+ public:
+  explicit RegisterToken(const Register reg)
+      : ValueToken<const Register>(reg) {}
+
+  virtual bool IsRegister() const override { return true; }
+  virtual bool CanAddressMemory() const override { return value().Is64Bits(); }
+  virtual uint8_t* ToAddress(Debugger* debugger) const override;
+  virtual void Print(FILE* out = stdout) const override;
+  const char* Name() const;
+
+  static Token* Tokenize(const char* arg);
+  static RegisterToken* Cast(Token* tok) {
+    VIXL_ASSERT(tok->IsRegister());
+    return reinterpret_cast<RegisterToken*>(tok);
+  }
+
+ private:
+  static const int kMaxAliasNumber = 4;
+  static const char* kXAliases[kNumberOfRegisters][kMaxAliasNumber];
+  static const char* kWAliases[kNumberOfRegisters][kMaxAliasNumber];
+};
+
+// Floating point registers (D or S).
+// Format: sn or dn with 0 <= n < 32.
+class FPRegisterToken : public ValueToken<const FPRegister> {
+ public:
+  explicit FPRegisterToken(const FPRegister fpreg)
+      : ValueToken<const FPRegister>(fpreg) {}
+
+  virtual bool IsFPRegister() const override { return true; }
+  virtual void Print(FILE* out = stdout) const override;
+
+  static Token* Tokenize(const char* arg);
+  static FPRegisterToken* Cast(Token* tok) {
+    VIXL_ASSERT(tok->IsFPRegister());
+    return reinterpret_cast<FPRegisterToken*>(tok);
+  }
+};
+
+
+// Non-register identifiers.
+// Format: Alphanumeric string starting with a letter.
+class IdentifierToken : public ValueToken<char*> {
+ public:
+  explicit IdentifierToken(const char* name) {
+    size_t size = strlen(name) + 1;
+    value_ = js_pod_malloc<char>(size);
+    strncpy(value_, name, size);
+  }
+  virtual ~IdentifierToken() { js_free(value_); }
+
+  virtual bool IsIdentifier() const override { return true; }
+  virtual bool CanAddressMemory() const override { return strcmp(value(), "pc") == 0; }
+  virtual uint8_t* ToAddress(Debugger* debugger) const override;
+  virtual void Print(FILE* out = stdout) const override;
+
+  static Token* Tokenize(const char* arg);
+  static IdentifierToken* Cast(Token* tok) {
+    VIXL_ASSERT(tok->IsIdentifier());
+    return reinterpret_cast<IdentifierToken*>(tok);
+  }
+};
+
+// 64-bit address literal.
+// Format: 0x... with up to 16 hexadecimal digits.
+class AddressToken : public ValueToken<uint8_t*> {
+ public:
+  explicit AddressToken(uint8_t* address) : ValueToken<uint8_t*>(address) {}
+
+  virtual bool IsAddress() const override { return true; }
+  virtual bool CanAddressMemory() const override { return true; }
+  virtual uint8_t* ToAddress(Debugger* debugger) const override;
+  virtual void Print(FILE* out = stdout) const override;
+
+  static Token* Tokenize(const char* arg);
+  static AddressToken* Cast(Token* tok) {
+    VIXL_ASSERT(tok->IsAddress());
+    return reinterpret_cast<AddressToken*>(tok);
+  }
+};
+
+
+// 64-bit decimal integer literal.
+// Format: n.
+class IntegerToken : public ValueToken<int64_t> {
+ public:
+  explicit IntegerToken(int64_t value) : ValueToken<int64_t>(value) {}
+
+  virtual bool IsInteger() const override { return true; }
+  virtual void Print(FILE* out = stdout) const override;
+
+  static Token* Tokenize(const char* arg);
+  static IntegerToken* Cast(Token* tok) {
+    VIXL_ASSERT(tok->IsInteger());
+    return reinterpret_cast<IntegerToken*>(tok);
+  }
+};
+
+// Literal describing how to print a chunk of data (up to 64 bits).
+// Format: .ln
+// where l (letter) is one of
+//  * x: hexadecimal
+//  * s: signed integer
+//  * u: unsigned integer
+//  * f: floating point
+//  * i: instruction
+// and n (size) is one of 8, 16, 32 and 64. n should be omitted for
+// instructions.
+class FormatToken : public Token {
+ public:
+  FormatToken() {}
+
+  virtual bool IsFormat() const override { return true; }
+  virtual int SizeOf() const = 0;
+  virtual char type_code() const = 0;
+  virtual void PrintData(void* data, FILE* out = stdout) const = 0;
+  virtual void Print(FILE* out = stdout) const override = 0;
+
+  virtual uint8_t* ToAddress(Debugger* debugger) const override {
+    USE(debugger);
+    VIXL_ABORT();
+  }
+
+  static Token* Tokenize(const char* arg);
+  static FormatToken* Cast(Token* tok) {
+    VIXL_ASSERT(tok->IsFormat());
+    return reinterpret_cast<FormatToken*>(tok);
+  }
+};
+
+
+template<typename T> class Format : public FormatToken {
+ public:
+  Format(const char* fmt, char type_code) : fmt_(fmt), type_code_(type_code) {}
+
+  virtual int SizeOf() const override { return sizeof(T); }
+  virtual char type_code() const override { return type_code_; }
+  virtual void PrintData(void* data, FILE* out = stdout) const override {
+    T value;
+    memcpy(&value, data, sizeof(value));
+    fprintf(out, fmt_, value);
+  }
+  virtual void Print(FILE* out = stdout) const override;
+
+ private:
+  const char* fmt_;
+  char type_code_;
+};
+
+// Tokens which don't fit any of the above.
+class UnknownToken : public Token {
+ public:
+  explicit UnknownToken(const char* arg) {
+    size_t size = strlen(arg) + 1;
+    unknown_ = js_pod_malloc<char>(size);
+    strncpy(unknown_, arg, size);
+  }
+  virtual ~UnknownToken() { js_free(unknown_); }
+  virtual uint8_t* ToAddress(Debugger* debugger) const override {
+    USE(debugger);
+    VIXL_ABORT();
+  }
+
+  virtual bool IsUnknown() const override { return true; }
+  virtual void Print(FILE* out = stdout) const override;
+
+ private:
+  char* unknown_;
+};
+
+
+// All debugger commands must subclass DebugCommand and implement Run, Print
+// and Build. Commands must also define kHelp and kAliases.
+class DebugCommand {
+ public:
+  explicit DebugCommand(Token* name) : name_(IdentifierToken::Cast(name)) {}
+  DebugCommand() : name_(NULL) {}
+  virtual ~DebugCommand() { js_delete(name_); }
+
+  const char* name() { return name_->value(); }
+  // Run the command on the given debugger. The command returns true if
+  // execution should move to the next instruction.
+  virtual bool Run(Debugger * debugger) = 0;
+  virtual void Print(FILE* out = stdout);
+
+  static bool Match(const char* name, const char** aliases);
+  static DebugCommand* Parse(char* line);
+  static void PrintHelp(const char** aliases,
+                        const char* args,
+                        const char* help);
+
+ private:
+  IdentifierToken* name_;
+};
+
+// For all commands below see their respective kHelp and kAliases in
+// debugger-a64.cc
+class HelpCommand : public DebugCommand {
+ public:
+  explicit HelpCommand(Token* name) : DebugCommand(name) {}
+
+  virtual bool Run(Debugger* debugger) override;
+
+  static DebugCommand* Build(TokenVector&& args);
+
+  static const char* kHelp;
+  static const char* kAliases[];
+  static const char* kArguments;
+};
+
+
+class ContinueCommand : public DebugCommand {
+ public:
+  explicit ContinueCommand(Token* name) : DebugCommand(name) {}
+
+  virtual bool Run(Debugger* debugger) override;
+
+  static DebugCommand* Build(TokenVector&& args);
+
+  static const char* kHelp;
+  static const char* kAliases[];
+  static const char* kArguments;
+};
+
+
+class StepCommand : public DebugCommand {
+ public:
+  StepCommand(Token* name, IntegerToken* count)
+      : DebugCommand(name), count_(count) {}
+  virtual ~StepCommand() { js_delete(count_); }
+
+  int64_t count() { return count_->value(); }
+  virtual bool Run(Debugger* debugger) override;
+  virtual void Print(FILE* out = stdout) override;
+
+  static DebugCommand* Build(TokenVector&& args);
+
+  static const char* kHelp;
+  static const char* kAliases[];
+  static const char* kArguments;
+
+ private:
+  IntegerToken* count_;
+};
+
+class DisasmCommand : public DebugCommand {
+ public:
+  static DebugCommand* Build(TokenVector&& args);
+
+  static const char* kHelp;
+  static const char* kAliases[];
+  static const char* kArguments;
+};
+
+
+class PrintCommand : public DebugCommand {
+ public:
+  PrintCommand(Token* name, Token* target, FormatToken* format)
+      : DebugCommand(name), target_(target), format_(format) {}
+  virtual ~PrintCommand() {
+    js_delete(target_);
+    js_delete(format_);
+  }
+
+  Token* target() { return target_; }
+  FormatToken* format() { return format_; }
+  virtual bool Run(Debugger* debugger) override;
+  virtual void Print(FILE* out = stdout) override;
+
+  static DebugCommand* Build(TokenVector&& args);
+
+  static const char* kHelp;
+  static const char* kAliases[];
+  static const char* kArguments;
+
+ private:
+  Token* target_;
+  FormatToken* format_;
+};
+
+class ExamineCommand : public DebugCommand {
+ public:
+  ExamineCommand(Token* name,
+                 Token* target,
+                 FormatToken* format,
+                 IntegerToken* count)
+      : DebugCommand(name), target_(target), format_(format), count_(count) {}
+  virtual ~ExamineCommand() {
+    js_delete(target_);
+    js_delete(format_);
+    js_delete(count_);
+  }
+
+  Token* target() { return target_; }
+  FormatToken* format() { return format_; }
+  IntegerToken* count() { return count_; }
+  virtual bool Run(Debugger* debugger) override;
+  virtual void Print(FILE* out = stdout) override;
+
+  static DebugCommand* Build(TokenVector&& args);
+
+  static const char* kHelp;
+  static const char* kAliases[];
+  static const char* kArguments;
+
+ private:
+  Token* target_;
+  FormatToken* format_;
+  IntegerToken* count_;
+};
+
+// Commands which name does not match any of the known commnand.
+class UnknownCommand : public DebugCommand {
+ public:
+  explicit UnknownCommand(TokenVector&& args) : args_(std::move(args)) {}
+  virtual ~UnknownCommand();
+
+  virtual bool Run(Debugger* debugger) override;
+
+ private:
+  TokenVector args_;
+};
+
+// Commands which name match a known command but the syntax is invalid.
+class InvalidCommand : public DebugCommand {
+ public:
+  InvalidCommand(TokenVector&& args, int index, const char* cause)
+      : args_(std::move(args)), index_(index), cause_(cause) {}
+  virtual ~InvalidCommand();
+
+  virtual bool Run(Debugger* debugger) override;
+
+ private:
+  TokenVector args_;
+  int index_;
+  const char* cause_;
+};
+
+const char* HelpCommand::kAliases[] = { "help", NULL };
+const char* HelpCommand::kArguments = NULL;
+const char* HelpCommand::kHelp = "  Print this help.";
+
+const char* ContinueCommand::kAliases[] = { "continue", "c", NULL };
+const char* ContinueCommand::kArguments = NULL;
+const char* ContinueCommand::kHelp = "  Resume execution.";
+
+const char* StepCommand::kAliases[] = { "stepi", "si", NULL };
+const char* StepCommand::kArguments = "[n = 1]";
+const char* StepCommand::kHelp = "  Execute n next instruction(s).";
+
+const char* DisasmCommand::kAliases[] = { "disasm", "di", NULL };
+const char* DisasmCommand::kArguments = "[n = 10]";
+const char* DisasmCommand::kHelp =
+  "  Disassemble n instruction(s) at pc.\n"
+  "  This command is equivalent to x pc.i [n = 10]."
+;
+
+const char* PrintCommand::kAliases[] = { "print", "p", NULL };
+const char* PrintCommand::kArguments =  "<entity>[.format]";
+const char* PrintCommand::kHelp =
+  "  Print the given entity according to the given format.\n"
+  "  The format parameter only affects individual registers; it is ignored\n"
+  "  for other entities.\n"
+  "  <entity> can be one of the following:\n"
+  "   * A register name (such as x0, s1, ...).\n"
+  "   * 'regs', to print all integer (W and X) registers.\n"
+  "   * 'fpregs' to print all floating-point (S and D) registers.\n"
+  "   * 'sysregs' to print all system registers (including NZCV).\n"
+  "   * 'pc' to print the current program counter.\n"
+;
+
+const char* ExamineCommand::kAliases[] = { "m", "mem", "x", NULL };
+const char* ExamineCommand::kArguments = "<addr>[.format] [n = 10]";
+const char* ExamineCommand::kHelp =
+  "  Examine memory. Print n items of memory at address <addr> according to\n"
+  "  the given [.format].\n"
+  "  Addr can be an immediate address, a register name or pc.\n"
+  "  Format is made of a type letter: 'x' (hexadecimal), 's' (signed), 'u'\n"
+  "  (unsigned), 'f' (floating point), i (instruction) and a size in bits\n"
+  "  when appropriate (8, 16, 32, 64)\n"
+  "  E.g 'x sp.x64' will print 10 64-bit words from the stack in\n"
+  "  hexadecimal format."
+;
+
+const char* RegisterToken::kXAliases[kNumberOfRegisters][kMaxAliasNumber] = {
+  { "x0", NULL },
+  { "x1", NULL },
+  { "x2", NULL },
+  { "x3", NULL },
+  { "x4", NULL },
+  { "x5", NULL },
+  { "x6", NULL },
+  { "x7", NULL },
+  { "x8", NULL },
+  { "x9", NULL },
+  { "x10", NULL },
+  { "x11", NULL },
+  { "x12", NULL },
+  { "x13", NULL },
+  { "x14", NULL },
+  { "x15", NULL },
+  { "ip0", "x16", NULL },
+  { "ip1", "x17", NULL },
+  { "x18", "pr", NULL },
+  { "x19", NULL },
+  { "x20", NULL },
+  { "x21", NULL },
+  { "x22", NULL },
+  { "x23", NULL },
+  { "x24", NULL },
+  { "x25", NULL },
+  { "x26", NULL },
+  { "x27", NULL },
+  { "x28", NULL },
+  { "fp", "x29", NULL },
+  { "lr", "x30", NULL },
+  { "sp", NULL}
+};
+
+const char* RegisterToken::kWAliases[kNumberOfRegisters][kMaxAliasNumber] = {
+  { "w0", NULL },
+  { "w1", NULL },
+  { "w2", NULL },
+  { "w3", NULL },
+  { "w4", NULL },
+  { "w5", NULL },
+  { "w6", NULL },
+  { "w7", NULL },
+  { "w8", NULL },
+  { "w9", NULL },
+  { "w10", NULL },
+  { "w11", NULL },
+  { "w12", NULL },
+  { "w13", NULL },
+  { "w14", NULL },
+  { "w15", NULL },
+  { "w16", NULL },
+  { "w17", NULL },
+  { "w18", NULL },
+  { "w19", NULL },
+  { "w20", NULL },
+  { "w21", NULL },
+  { "w22", NULL },
+  { "w23", NULL },
+  { "w24", NULL },
+  { "w25", NULL },
+  { "w26", NULL },
+  { "w27", NULL },
+  { "w28", NULL },
+  { "w29", NULL },
+  { "w30", NULL },
+  { "wsp", NULL }
+};
+
+
+Debugger::Debugger(Decoder* decoder, FILE* stream)
+    : Simulator(decoder, stream),
+      debug_parameters_(DBG_INACTIVE),
+      pending_request_(false),
+      steps_(0),
+      last_command_(NULL) {
+  disasm_ = js_new<PrintDisassembler>(stdout);
+  printer_ = js_new<Decoder>();
+  printer_->AppendVisitor(disasm_);
+}
+
+
+Debugger::~Debugger() {
+  js_delete(disasm_);
+  js_delete(printer_);
+}
+
+
+void Debugger::Run() {
+  pc_modified_ = false;
+  while (pc_ != kEndOfSimAddress) {
+    if (pending_request()) RunDebuggerShell();
+    ExecuteInstruction();
+    LogAllWrittenRegisters();
+  }
+}
+
+
+void Debugger::PrintInstructions(const void* address, int64_t count) {
+  if (count == 0) {
+    return;
+  }
+
+  const Instruction* from = Instruction::CastConst(address);
+  if (count < 0) {
+    count = -count;
+    from -= (count - 1) * kInstructionSize;
+  }
+  const Instruction* to = from + count * kInstructionSize;
+
+  for (const Instruction* current = from;
+       current < to;
+       current = current->NextInstruction()) {
+    printer_->Decode(current);
+  }
+}
+
+
+void Debugger::PrintMemory(const uint8_t* address,
+                           const FormatToken* format,
+                           int64_t count) {
+  if (count == 0) {
+    return;
+  }
+
+  const uint8_t* from = address;
+  int size = format->SizeOf();
+  if (count < 0) {
+    count = -count;
+    from -= (count - 1) * size;
+  }
+  const uint8_t* to = from + count * size;
+
+  for (const uint8_t* current = from; current < to; current += size) {
+    if (((current - from) % 8) == 0) {
+      printf("\n%p: ", current);
+    }
+
+    uint64_t data = Memory::Read<uint64_t>(current);
+    format->PrintData(&data);
+    printf(" ");
+  }
+  printf("\n\n");
+}
+
+
+void Debugger::PrintRegister(const Register& target_reg,
+                             const char* name,
+                             const FormatToken* format) {
+  const uint64_t reg_size = target_reg.size();
+  const uint64_t format_size = format->SizeOf() * 8;
+  const uint64_t count = reg_size / format_size;
+  const uint64_t mask = 0xffffffffffffffff >> (64 - format_size);
+  const uint64_t reg_value = reg<uint64_t>(target_reg.code(),
+                                           Reg31IsStackPointer);
+  VIXL_ASSERT(count > 0);
+
+  printf("%s = ", name);
+  for (uint64_t i = 1; i <= count; i++) {
+    uint64_t data = reg_value >> (reg_size - (i * format_size));
+    data &= mask;
+    format->PrintData(&data);
+    printf(" ");
+  }
+  printf("\n");
+}
+
+
+// TODO(all): fix this for vector registers.
+void Debugger::PrintFPRegister(const FPRegister& target_fpreg,
+                               const FormatToken* format) {
+  const unsigned fpreg_size = target_fpreg.size();
+  const uint64_t format_size = format->SizeOf() * 8;
+  const uint64_t count = fpreg_size / format_size;
+  const uint64_t mask = 0xffffffffffffffff >> (64 - format_size);
+  const uint64_t fpreg_value = vreg<uint64_t>(fpreg_size, target_fpreg.code());
+  VIXL_ASSERT(count > 0);
+
+  if (target_fpreg.Is32Bits()) {
+    printf("s%u = ", target_fpreg.code());
+  } else {
+    printf("d%u = ", target_fpreg.code());
+  }
+  for (uint64_t i = 1; i <= count; i++) {
+    uint64_t data = fpreg_value >> (fpreg_size - (i * format_size));
+    data &= mask;
+    format->PrintData(&data);
+    printf(" ");
+  }
+  printf("\n");
+}
+
+
+void Debugger::VisitException(const Instruction* instr) {
+  switch (instr->Mask(ExceptionMask)) {
+    case BRK:
+      DoBreakpoint(instr);
+      return;
+    case HLT:
+      VIXL_FALLTHROUGH();
+    default: Simulator::VisitException(instr);
+  }
+}
+
+
+// Read a command. A command will be at most kMaxDebugShellLine char long and
+// ends with '\n\0'.
+// TODO: Should this be a utility function?
+char* Debugger::ReadCommandLine(const char* prompt, char* buffer, int length) {
+  int fgets_calls = 0;
+  char* end = NULL;
+
+  printf("%s", prompt);
+  fflush(stdout);
+
+  do {
+    if (fgets(buffer, length, stdin) == NULL) {
+      printf(" ** Error while reading command. **\n");
+      return NULL;
+    }
+
+    fgets_calls++;
+    end = strchr(buffer, '\n');
+  } while (end == NULL);
+
+  if (fgets_calls != 1) {
+    printf(" ** Command too long. **\n");
+    return NULL;
+  }
+
+  // Remove the newline from the end of the command.
+  VIXL_ASSERT(end[1] == '\0');
+  VIXL_ASSERT((end - buffer) < (length - 1));
+  end[0] = '\0';
+
+  return buffer;
+}
+
+
+void Debugger::RunDebuggerShell() {
+  if (IsDebuggerRunning()) {
+    if (steps_ > 0) {
+      // Finish stepping first.
+      --steps_;
+      return;
+    }
+
+    printf("Next: ");
+    PrintInstructions(pc());
+    bool done = false;
+    while (!done) {
+      char buffer[kMaxDebugShellLine];
+      char* line = ReadCommandLine("vixl> ", buffer, kMaxDebugShellLine);
+
+      if (line == NULL) continue;  // An error occurred.
+
+      DebugCommand* command = DebugCommand::Parse(line);
+      if (command != NULL) {
+        last_command_ = command;
+      }
+
+      if (last_command_ != NULL) {
+        done = last_command_->Run(this);
+      } else {
+        printf("No previous command to run!\n");
+      }
+    }
+
+    if ((debug_parameters_ & DBG_BREAK) != 0) {
+      // The break request has now been handled, move to next instruction.
+      debug_parameters_ &= ~DBG_BREAK;
+      increment_pc();
+    }
+  }
+}
+
+
+void Debugger::DoBreakpoint(const Instruction* instr) {
+  VIXL_ASSERT(instr->Mask(ExceptionMask) == BRK);
+
+  printf("Hit breakpoint at pc=%p.\n", reinterpret_cast<const void*>(instr));
+  set_debug_parameters(debug_parameters() | DBG_BREAK | DBG_ACTIVE);
+  // Make the shell point to the brk instruction.
+  set_pc(instr);
+}
+
+
+static bool StringToUInt64(uint64_t* value, const char* line, int base = 10) {
+  char* endptr = NULL;
+  errno = 0;  // Reset errors.
+  uint64_t parsed = strtoul(line, &endptr, base);
+
+  if (errno == ERANGE) {
+    // Overflow.
+    return false;
+  }
+
+  if (endptr == line) {
+    // No digits were parsed.
+    return false;
+  }
+
+  if (*endptr != '\0') {
+    // Non-digit characters present at the end.
+    return false;
+  }
+
+  *value = parsed;
+  return true;
+}
+
+
+static bool StringToInt64(int64_t* value, const char* line, int base = 10) {
+  char* endptr = NULL;
+  errno = 0;  // Reset errors.
+  int64_t parsed = strtol(line, &endptr, base);
+
+  if (errno == ERANGE) {
+    // Overflow, undeflow.
+    return false;
+  }
+
+  if (endptr == line) {
+    // No digits were parsed.
+    return false;
+  }
+
+  if (*endptr != '\0') {
+    // Non-digit characters present at the end.
+    return false;
+  }
+
+  *value = parsed;
+  return true;
+}
+
+
+Token* Token::Tokenize(const char* arg) {
+  if ((arg == NULL) || (*arg == '\0')) {
+    return NULL;
+  }
+
+  // The order is important. For example Identifier::Tokenize would consider
+  // any register to be a valid identifier.
+
+  Token* token = RegisterToken::Tokenize(arg);
+  if (token != NULL) {
+    return token;
+  }
+
+  token = FPRegisterToken::Tokenize(arg);
+  if (token != NULL) {
+    return token;
+  }
+
+  token = IdentifierToken::Tokenize(arg);
+  if (token != NULL) {
+    return token;
+  }
+
+  token = AddressToken::Tokenize(arg);
+  if (token != NULL) {
+    return token;
+  }
+
+  token = IntegerToken::Tokenize(arg);
+  if (token != NULL) {
+    return token;
+  }
+
+  return js_new<UnknownToken>(arg);
+}
+
+
+uint8_t* RegisterToken::ToAddress(Debugger* debugger) const {
+  VIXL_ASSERT(CanAddressMemory());
+  uint64_t reg_value = debugger->xreg(value().code(), Reg31IsStackPointer);
+  uint8_t* address = NULL;
+  memcpy(&address, &reg_value, sizeof(address));
+  return address;
+}
+
+
+void RegisterToken::Print(FILE* out) const {
+  VIXL_ASSERT(value().IsValid());
+  fprintf(out, "[Register %s]", Name());
+}
+
+
+const char* RegisterToken::Name() const {
+  if (value().Is32Bits()) {
+    return kWAliases[value().code()][0];
+  } else {
+    return kXAliases[value().code()][0];
+  }
+}
+
+
+Token* RegisterToken::Tokenize(const char* arg) {
+  for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+    // Is it a X register or alias?
+    for (const char** current = kXAliases[i]; *current != NULL; current++) {
+      if (strcmp(arg, *current) == 0) {
+        return js_new<RegisterToken>(Register::XRegFromCode(i));
+      }
+    }
+
+    // Is it a W register or alias?
+    for (const char** current = kWAliases[i]; *current != NULL; current++) {
+      if (strcmp(arg, *current) == 0) {
+        return js_new<RegisterToken>(Register::WRegFromCode(i));
+      }
+    }
+  }
+
+  return NULL;
+}
+
+
+void FPRegisterToken::Print(FILE* out) const {
+  VIXL_ASSERT(value().IsValid());
+  char prefix = value().Is32Bits() ? 's' : 'd';
+  fprintf(out, "[FPRegister %c%" PRIu32 "]", prefix, value().code());
+}
+
+
+Token* FPRegisterToken::Tokenize(const char* arg) {
+  if (strlen(arg) < 2) {
+    return NULL;
+  }
+
+  switch (*arg) {
+    case 's':
+    case 'd':
+      const char* cursor = arg + 1;
+      uint64_t code = 0;
+      if (!StringToUInt64(&code, cursor)) {
+        return NULL;
+      }
+
+      if (code > kNumberOfFPRegisters) {
+        return NULL;
+      }
+
+      VRegister fpreg = NoVReg;
+      switch (*arg) {
+        case 's':
+          fpreg = VRegister::SRegFromCode(static_cast<unsigned>(code));
+          break;
+        case 'd':
+          fpreg = VRegister::DRegFromCode(static_cast<unsigned>(code));
+          break;
+        default: VIXL_UNREACHABLE();
+      }
+
+      return js_new<FPRegisterToken>(fpreg);
+  }
+
+  return NULL;
+}
+
+
+uint8_t* IdentifierToken::ToAddress(Debugger* debugger) const {
+  VIXL_ASSERT(CanAddressMemory());
+  const Instruction* pc_value = debugger->pc();
+  uint8_t* address = NULL;
+  memcpy(&address, &pc_value, sizeof(address));
+  return address;
+}
+
+void IdentifierToken::Print(FILE* out) const {
+  fprintf(out, "[Identifier %s]", value());
+}
+
+
+Token* IdentifierToken::Tokenize(const char* arg) {
+  if (!isalpha(arg[0])) {
+    return NULL;
+  }
+
+  const char* cursor = arg + 1;
+  while ((*cursor != '\0') && isalnum(*cursor)) {
+    ++cursor;
+  }
+
+  if (*cursor == '\0') {
+    return js_new<IdentifierToken>(arg);
+  }
+
+  return NULL;
+}
+
+
+uint8_t* AddressToken::ToAddress(Debugger* debugger) const {
+  USE(debugger);
+  return value();
+}
+
+
+void AddressToken::Print(FILE* out) const {
+  fprintf(out, "[Address %p]", value());
+}
+
+
+Token* AddressToken::Tokenize(const char* arg) {
+  if ((strlen(arg) < 3) || (arg[0] != '0') || (arg[1] != 'x')) {
+    return NULL;
+  }
+
+  uint64_t ptr = 0;
+  if (!StringToUInt64(&ptr, arg, 16)) {
+    return NULL;
+  }
+
+  uint8_t* address = reinterpret_cast<uint8_t*>(ptr);
+  return js_new<AddressToken>(address);
+}
+
+
+void IntegerToken::Print(FILE* out) const {
+  fprintf(out, "[Integer %" PRId64 "]", value());
+}
+
+
+Token* IntegerToken::Tokenize(const char* arg) {
+  int64_t value = 0;
+  if (!StringToInt64(&value, arg)) {
+    return NULL;
+  }
+
+  return js_new<IntegerToken>(value);
+}
+
+
+Token* FormatToken::Tokenize(const char* arg) {
+  size_t length = strlen(arg);
+  switch (arg[0]) {
+    case 'x':
+    case 's':
+    case 'u':
+    case 'f':
+      if (length == 1) return NULL;
+      break;
+    case 'i':
+      if (length == 1) return js_new<Format<uint32_t>>("%08" PRIx32, 'i');
+      VIXL_FALLTHROUGH();
+    default: return NULL;
+  }
+
+  char* endptr = NULL;
+  errno = 0;  // Reset errors.
+  uint64_t count = strtoul(arg + 1, &endptr, 10);
+
+  if (errno != 0) {
+    // Overflow, etc.
+    return NULL;
+  }
+
+  if (endptr == arg) {
+    // No digits were parsed.
+    return NULL;
+  }
+
+  if (*endptr != '\0') {
+    // There are unexpected (non-digit) characters after the number.
+    return NULL;
+  }
+
+  switch (arg[0]) {
+    case 'x':
+      switch (count) {
+        case 8: return js_new<Format<uint8_t>>("%02" PRIx8, 'x');
+        case 16: return js_new<Format<uint16_t>>("%04" PRIx16, 'x');
+        case 32: return js_new<Format<uint32_t>>("%08" PRIx32, 'x');
+        case 64: return js_new<Format<uint64_t>>("%016" PRIx64, 'x');
+        default: return NULL;
+      }
+    case 's':
+      switch (count) {
+        case 8: return js_new<Format<int8_t>>("%4" PRId8, 's');
+        case 16: return js_new<Format<int16_t>>("%6" PRId16, 's');
+        case 32: return js_new<Format<int32_t>>("%11" PRId32, 's');
+        case 64: return js_new<Format<int64_t>>("%20" PRId64, 's');
+        default: return NULL;
+      }
+    case 'u':
+      switch (count) {
+        case 8: return js_new<Format<uint8_t>>("%3" PRIu8, 'u');
+        case 16: return js_new<Format<uint16_t>>("%5" PRIu16, 'u');
+        case 32: return js_new<Format<uint32_t>>("%10" PRIu32, 'u');
+        case 64: return js_new<Format<uint64_t>>("%20" PRIu64, 'u');
+        default: return NULL;
+      }
+    case 'f':
+      switch (count) {
+        case 32: return js_new<Format<float>>("%13g", 'f');
+        case 64: return js_new<Format<double>>("%13g", 'f');
+        default: return NULL;
+      }
+    default:
+      VIXL_UNREACHABLE();
+      return NULL;
+  }
+}
+
+
+template<typename T>
+void Format<T>::Print(FILE* out) const {
+  unsigned size = sizeof(T) * 8;
+  fprintf(out, "[Format %c%u - %s]", type_code_, size, fmt_);
+}
+
+
+void UnknownToken::Print(FILE* out) const {
+  fprintf(out, "[Unknown %s]", unknown_);
+}
+
+
+void DebugCommand::Print(FILE* out) {
+  fprintf(out, "%s", name());
+}
+
+
+bool DebugCommand::Match(const char* name, const char** aliases) {
+  for (const char** current = aliases; *current != NULL; current++) {
+    if (strcmp(name, *current) == 0) {
+       return true;
+    }
+  }
+
+  return false;
+}
+
+
+DebugCommand* DebugCommand::Parse(char* line) {
+  TokenVector args;
+
+  for (char* chunk = strtok(line, " \t");
+       chunk != NULL;
+       chunk = strtok(NULL, " \t")) {
+    char* dot = strchr(chunk, '.');
+    if (dot != NULL) {
+      // 'Token.format'.
+      Token* format = FormatToken::Tokenize(dot + 1);
+      if (format != NULL) {
+        *dot = '\0';
+        (void)args.append(Token::Tokenize(chunk));
+        (void)args.append(format);
+      } else {
+        // Error while parsing the format, push the UnknownToken so an error
+        // can be accurately reported.
+        (void)args.append(Token::Tokenize(chunk));
+      }
+    } else {
+      (void)args.append(Token::Tokenize(chunk));
+    }
+  }
+
+  if (args.empty()) {
+    return NULL;
+  }
+
+  if (!args[0]->IsIdentifier()) {
+    return js_new<InvalidCommand>(std::move(args), 0, "command name is not valid");
+  }
+
+  const char* name = IdentifierToken::Cast(args[0])->value();
+  #define RETURN_IF_MATCH(Command)       \
+  if (Match(name, Command::kAliases)) {  \
+    return Command::Build(std::move(args));   \
+  }
+  DEBUG_COMMAND_LIST(RETURN_IF_MATCH);
+  #undef RETURN_IF_MATCH
+
+  return js_new<UnknownCommand>(std::move(args));
+}
+
+
+void DebugCommand::PrintHelp(const char** aliases,
+                             const char* args,
+                             const char* help) {
+  VIXL_ASSERT(aliases[0] != NULL);
+  VIXL_ASSERT(help != NULL);
+
+  printf("\n----\n\n");
+  for (const char** current = aliases; *current != NULL; current++) {
+    if (args != NULL) {
+      printf("%s %s\n", *current, args);
+    } else {
+      printf("%s\n", *current);
+    }
+  }
+  printf("\n%s\n", help);
+}
+
+
+bool HelpCommand::Run(Debugger* debugger) {
+  VIXL_ASSERT(debugger->IsDebuggerRunning());
+  USE(debugger);
+
+  #define PRINT_HELP(Command)                     \
+    DebugCommand::PrintHelp(Command::kAliases,    \
+                            Command::kArguments,  \
+                            Command::kHelp);
+  DEBUG_COMMAND_LIST(PRINT_HELP);
+  #undef PRINT_HELP
+  printf("\n----\n\n");
+
+  return false;
+}
+
+
+DebugCommand* HelpCommand::Build(TokenVector&& args) {
+  if (args.length() != 1) {
+    return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+  }
+
+  return js_new<HelpCommand>(args[0]);
+}
+
+
+bool ContinueCommand::Run(Debugger* debugger) {
+  VIXL_ASSERT(debugger->IsDebuggerRunning());
+
+  debugger->set_debug_parameters(debugger->debug_parameters() & ~DBG_ACTIVE);
+  return true;
+}
+
+
+DebugCommand* ContinueCommand::Build(TokenVector&& args) {
+  if (args.length() != 1) {
+    return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+  }
+
+  return js_new<ContinueCommand>(args[0]);
+}
+
+
+bool StepCommand::Run(Debugger* debugger) {
+  VIXL_ASSERT(debugger->IsDebuggerRunning());
+
+  int64_t steps = count();
+  if (steps < 0) {
+    printf(" ** invalid value for steps: %" PRId64 " (<0) **\n", steps);
+  } else if (steps > 1) {
+    debugger->set_steps(steps - 1);
+  }
+
+  return true;
+}
+
+
+void StepCommand::Print(FILE* out) {
+  fprintf(out, "%s %" PRId64 "", name(), count());
+}
+
+
+DebugCommand* StepCommand::Build(TokenVector&& args) {
+  IntegerToken* count = NULL;
+  switch (args.length()) {
+    case 1: {  // step [1]
+      count = js_new<IntegerToken>(1);
+      break;
+    }
+    case 2: {  // step n
+      Token* first = args[1];
+      if (!first->IsInteger()) {
+        return js_new<InvalidCommand>(std::move(args), 1, "expects int");
+      }
+      count = IntegerToken::Cast(first);
+      break;
+    }
+    default:
+      return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+  }
+
+  return js_new<StepCommand>(args[0], count);
+}
+
+
+DebugCommand* DisasmCommand::Build(TokenVector&& args) {
+  IntegerToken* count = NULL;
+  switch (args.length()) {
+    case 1: {  // disasm [10]
+      count = js_new<IntegerToken>(10);
+      break;
+    }
+    case 2: {  // disasm n
+      Token* first = args[1];
+      if (!first->IsInteger()) {
+        return js_new<InvalidCommand>(std::move(args), 1, "expects int");
+      }
+
+      count = IntegerToken::Cast(first);
+      break;
+    }
+    default:
+      return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+  }
+
+  Token* target = js_new<IdentifierToken>("pc");
+  FormatToken* format = js_new<Format<uint32_t>>("%08" PRIx32, 'i');
+  return js_new<ExamineCommand>(args[0], target, format, count);
+}
+
+
+void PrintCommand::Print(FILE* out) {
+  fprintf(out, "%s ", name());
+  target()->Print(out);
+  if (format() != NULL) format()->Print(out);
+}
+
+
+bool PrintCommand::Run(Debugger* debugger) {
+  VIXL_ASSERT(debugger->IsDebuggerRunning());
+
+  Token* tok = target();
+  if (tok->IsIdentifier()) {
+    char* identifier = IdentifierToken::Cast(tok)->value();
+    if (strcmp(identifier, "regs") == 0) {
+      debugger->PrintRegisters();
+    } else if (strcmp(identifier, "fpregs") == 0) {
+      debugger->PrintVRegisters();
+    } else if (strcmp(identifier, "sysregs") == 0) {
+      debugger->PrintSystemRegisters();
+    } else if (strcmp(identifier, "pc") == 0) {
+      printf("pc = %16p\n", reinterpret_cast<const void*>(debugger->pc()));
+    } else {
+      printf(" ** Unknown identifier to print: %s **\n", identifier);
+    }
+
+    return false;
+  }
+
+  FormatToken* format_tok = format();
+  VIXL_ASSERT(format_tok != NULL);
+  if (format_tok->type_code() == 'i') {
+    // TODO(all): Add support for instruction disassembly.
+    printf(" ** unsupported format: instructions **\n");
+    return false;
+  }
+
+  if (tok->IsRegister()) {
+    RegisterToken* reg_tok = RegisterToken::Cast(tok);
+    Register reg = reg_tok->value();
+    debugger->PrintRegister(reg, reg_tok->Name(), format_tok);
+    return false;
+  }
+
+  if (tok->IsFPRegister()) {
+    FPRegister fpreg = FPRegisterToken::Cast(tok)->value();
+    debugger->PrintFPRegister(fpreg, format_tok);
+    return false;
+  }
+
+  VIXL_UNREACHABLE();
+  return false;
+}
+
+
+DebugCommand* PrintCommand::Build(TokenVector&& args) {
+  if (args.length() < 2) {
+    return js_new<InvalidCommand>(std::move(args), -1, "too few arguments");
+  }
+
+  Token* target = args[1];
+  if (!target->IsRegister() &&
+      !target->IsFPRegister() &&
+      !target->IsIdentifier()) {
+    return js_new<InvalidCommand>(std::move(args), 1, "expects reg or identifier");
+  }
+
+  FormatToken* format = NULL;
+  int target_size = 0;
+  if (target->IsRegister()) {
+    Register reg = RegisterToken::Cast(target)->value();
+    target_size = reg.SizeInBytes();
+  } else if (target->IsFPRegister()) {
+    FPRegister fpreg = FPRegisterToken::Cast(target)->value();
+    target_size = fpreg.SizeInBytes();
+  }
+  // If the target is an identifier there must be no format. This is checked
+  // in the switch statement below.
+
+  switch (args.length()) {
+    case 2: {
+      if (target->IsRegister()) {
+        switch (target_size) {
+          case 4: format = js_new<Format<uint32_t>>("%08" PRIx32, 'x'); break;
+          case 8: format = js_new<Format<uint64_t>>("%016" PRIx64, 'x'); break;
+          default: VIXL_UNREACHABLE();
+        }
+      } else if (target->IsFPRegister()) {
+        switch (target_size) {
+          case 4: format = js_new<Format<float>>("%8g", 'f'); break;
+          case 8: format = js_new<Format<double>>("%8g", 'f'); break;
+          default: VIXL_UNREACHABLE();
+        }
+      }
+      break;
+    }
+    case 3: {
+      if (target->IsIdentifier()) {
+        return js_new<InvalidCommand>(std::move(args), 2,
+            "format is only allowed with registers");
+      }
+
+      Token* second = args[2];
+      if (!second->IsFormat()) {
+        return js_new<InvalidCommand>(std::move(args), 2, "expects format");
+      }
+      format = FormatToken::Cast(second);
+
+      if (format->SizeOf() > target_size) {
+        return js_new<InvalidCommand>(std::move(args), 2, "format too wide");
+      }
+
+      break;
+    }
+    default:
+      return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+  }
+
+  return js_new<PrintCommand>(args[0], target, format);
+}
+
+
+bool ExamineCommand::Run(Debugger* debugger) {
+  VIXL_ASSERT(debugger->IsDebuggerRunning());
+
+  uint8_t* address = target()->ToAddress(debugger);
+  int64_t  amount = count()->value();
+  if (format()->type_code() == 'i') {
+    debugger->PrintInstructions(address, amount);
+  } else {
+    debugger->PrintMemory(address, format(), amount);
+  }
+
+  return false;
+}
+
+
+void ExamineCommand::Print(FILE* out) {
+  fprintf(out, "%s ", name());
+  format()->Print(out);
+  target()->Print(out);
+}
+
+
+DebugCommand* ExamineCommand::Build(TokenVector&& args) {
+  if (args.length() < 2) {
+    return js_new<InvalidCommand>(std::move(args), -1, "too few arguments");
+  }
+
+  Token* target = args[1];
+  if (!target->CanAddressMemory()) {
+    return js_new<InvalidCommand>(std::move(args), 1, "expects address");
+  }
+
+  FormatToken* format = NULL;
+  IntegerToken* count = NULL;
+
+  switch (args.length()) {
+    case 2: {  // mem addr[.x64] [10]
+      format = js_new<Format<uint64_t>>("%016" PRIx64, 'x');
+      count = js_new<IntegerToken>(10);
+      break;
+    }
+    case 3: {  // mem addr.format [10]
+               // mem addr[.x64] n
+      Token* second = args[2];
+      if (second->IsFormat()) {
+        format = FormatToken::Cast(second);
+        count = js_new<IntegerToken>(10);
+        break;
+      } else if (second->IsInteger()) {
+        format = js_new<Format<uint64_t>>("%016" PRIx64, 'x');
+        count = IntegerToken::Cast(second);
+      } else {
+        return js_new<InvalidCommand>(std::move(args), 2, "expects format or integer");
+      }
+      VIXL_UNREACHABLE();
+      break;
+    }
+    case 4: {  // mem addr.format n
+      Token* second = args[2];
+      Token* third = args[3];
+      if (!second->IsFormat() || !third->IsInteger()) {
+        return js_new<InvalidCommand>(std::move(args), -1, "expects addr[.format] [n]");
+      }
+      format = FormatToken::Cast(second);
+      count = IntegerToken::Cast(third);
+      break;
+    }
+    default:
+      return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+  }
+
+  return js_new<ExamineCommand>(args[0], target, format, count);
+}
+
+
+UnknownCommand::~UnknownCommand() {
+  const size_t size = args_.length();
+  for (size_t i = 0; i < size; ++i) {
+    js_delete(args_[i]);
+  }
+}
+
+
+bool UnknownCommand::Run(Debugger* debugger) {
+  VIXL_ASSERT(debugger->IsDebuggerRunning());
+  USE(debugger);
+
+  printf(" ** Unknown Command:");
+  const size_t size = args_.length();
+  for (size_t i = 0; i < size; ++i) {
+    printf(" ");
+    args_[i]->Print(stdout);
+  }
+  printf(" **\n");
+
+  return false;
+}
+
+
+InvalidCommand::~InvalidCommand() {
+  const size_t size = args_.length();
+  for (size_t i = 0; i < size; ++i) {
+    js_delete(args_[i]);
+  }
+}
+
+
+bool InvalidCommand::Run(Debugger* debugger) {
+  VIXL_ASSERT(debugger->IsDebuggerRunning());
+  USE(debugger);
+
+  printf(" ** Invalid Command:");
+  const size_t size = args_.length();
+  for (size_t i = 0; i < size; ++i) {
+    printf(" ");
+    if (i == static_cast<size_t>(index_)) {
+      printf(">>");
+      args_[i]->Print(stdout);
+      printf("<<");
+    } else {
+      args_[i]->Print(stdout);
+    }
+  }
+  printf(" **\n");
+  printf(" ** %s\n", cause_);
+
+  return false;
+}
+
+}  // namespace vixl
+
+#endif  // JS_SIMULATOR_ARM64
diff --git a/js/src/jit/arm64/vixl/Debugger-vixl.h b/js/src/jit/arm64/vixl/Debugger-vixl.h
new file mode 100644
index 0000000000..7236bf1e5e
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Debugger-vixl.h
@@ -0,0 +1,117 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef JS_SIMULATOR_ARM64
+
+#ifndef VIXL_A64_DEBUGGER_A64_H_
+#define VIXL_A64_DEBUGGER_A64_H_
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+
+#include "jit/arm64/vixl/Constants-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Simulator-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+namespace vixl {
+
+// Flags that represent the debugger state.
+enum DebugParameters {
+  DBG_INACTIVE = 0,
+  DBG_ACTIVE = 1 << 0,  // The debugger is active.
+  DBG_BREAK  = 1 << 1   // The debugger is at a breakpoint.
+};
+
+// Forward declarations.
+class DebugCommand;
+class Token;
+class FormatToken;
+
+class Debugger : public Simulator {
+ public:
+  explicit Debugger(Decoder* decoder, FILE* stream = stdout);
+  ~Debugger();
+
+  virtual void Run() override;
+  virtual void VisitException(const Instruction* instr) override;
+
+  int debug_parameters() const { return debug_parameters_; }
+  void set_debug_parameters(int parameters) {
+    debug_parameters_ = parameters;
+
+    update_pending_request();
+  }
+
+  // Numbers of instructions to execute before the debugger shell is given
+  // back control.
+  int64_t steps() const { return steps_; }
+  void set_steps(int64_t value) {
+    VIXL_ASSERT(value > 1);
+    steps_ = value;
+  }
+
+  bool IsDebuggerRunning() const {
+    return (debug_parameters_ & DBG_ACTIVE) != 0;
+  }
+
+  bool pending_request() const { return pending_request_; }
+  void update_pending_request() {
+    pending_request_ = IsDebuggerRunning();
+  }
+
+  void PrintInstructions(const void* address, int64_t count = 1);
+  void PrintMemory(const uint8_t* address,
+                   const FormatToken* format,
+                   int64_t count = 1);
+  void PrintRegister(const Register& target_reg,
+                     const char* name,
+                     const FormatToken* format);
+  void PrintFPRegister(const FPRegister& target_fpreg,
+                       const FormatToken* format);
+
+ private:
+  char* ReadCommandLine(const char* prompt, char* buffer, int length);
+  void RunDebuggerShell();
+  void DoBreakpoint(const Instruction* instr);
+
+  int debug_parameters_;
+  bool pending_request_;
+  int64_t steps_;
+  DebugCommand* last_command_;
+  PrintDisassembler* disasm_;
+  Decoder* printer_;
+
+  // Length of the biggest command line accepted by the debugger shell.
+  static const int kMaxDebugShellLine = 256;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_A64_DEBUGGER_A64_H_
+
+#endif  // JS_SIMULATOR_ARM64
diff --git a/js/src/jit/arm64/vixl/Decoder-vixl.cpp b/js/src/jit/arm64/vixl/Decoder-vixl.cpp
new file mode 100644
index 0000000000..884654ec8e
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Decoder-vixl.cpp
@@ -0,0 +1,899 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Decoder-vixl.h"
+
+#include <algorithm>
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+namespace vixl {
+
+void Decoder::DecodeInstruction(const Instruction *instr) {
+  if (instr->Bits(28, 27) == 0) {
+    VisitUnallocated(instr);
+  } else {
+    switch (instr->Bits(27, 24)) {
+      // 0:   PC relative addressing.
+      case 0x0: DecodePCRelAddressing(instr); break;
+
+      // 1:   Add/sub immediate.
+      case 0x1: DecodeAddSubImmediate(instr); break;
+
+      // A:   Logical shifted register.
+      //      Add/sub with carry.
+      //      Conditional compare register.
+      //      Conditional compare immediate.
+      //      Conditional select.
+      //      Data processing 1 source.
+      //      Data processing 2 source.
+      // B:   Add/sub shifted register.
+      //      Add/sub extended register.
+      //      Data processing 3 source.
+      case 0xA:
+      case 0xB: DecodeDataProcessing(instr); break;
+
+      // 2:   Logical immediate.
+      //      Move wide immediate.
+      case 0x2: DecodeLogical(instr); break;
+
+      // 3:   Bitfield.
+      //      Extract.
+      case 0x3: DecodeBitfieldExtract(instr); break;
+
+      // 4:   Unconditional branch immediate.
+      //      Exception generation.
+      //      Compare and branch immediate.
+      // 5:   Compare and branch immediate.
+      //      Conditional branch.
+      //      System.
+      // 6,7: Unconditional branch.
+      //      Test and branch immediate.
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7: DecodeBranchSystemException(instr); break;
+
+      // 8,9: Load/store register pair post-index.
+      //      Load register literal.
+      //      Load/store register unscaled immediate.
+      //      Load/store register immediate post-index.
+      //      Load/store register immediate pre-index.
+      //      Load/store register offset.
+      //      Load/store exclusive.
+      // C,D: Load/store register pair offset.
+      //      Load/store register pair pre-index.
+      //      Load/store register unsigned immediate.
+      //      Advanced SIMD.
+      case 0x8:
+      case 0x9:
+      case 0xC:
+      case 0xD: DecodeLoadStore(instr); break;
+
+      // E:   FP fixed point conversion.
+      //      FP integer conversion.
+      //      FP data processing 1 source.
+      //      FP compare.
+      //      FP immediate.
+      //      FP data processing 2 source.
+      //      FP conditional compare.
+      //      FP conditional select.
+      //      Advanced SIMD.
+      // F:   FP data processing 3 source.
+      //      Advanced SIMD.
+      case 0xE:
+      case 0xF: DecodeFP(instr); break;
+    }
+  }
+}
+
+void Decoder::AppendVisitor(DecoderVisitor* new_visitor) {
+  MOZ_ALWAYS_TRUE(visitors_.append(new_visitor));
+}
+
+
+void Decoder::PrependVisitor(DecoderVisitor* new_visitor) {
+  MOZ_ALWAYS_TRUE(visitors_.insert(visitors_.begin(), new_visitor));
+}
+
+
+void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,
+                                  DecoderVisitor* registered_visitor) {
+  for (auto it = visitors_.begin(); it != visitors_.end(); it++) {
+    if (*it == registered_visitor) {
+      MOZ_ALWAYS_TRUE(visitors_.insert(it, new_visitor));
+      return;
+    }
+  }
+  // We reached the end of the list without finding registered_visitor.
+  MOZ_ALWAYS_TRUE(visitors_.append(new_visitor));
+}
+
+
+void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor,
+                                 DecoderVisitor* registered_visitor) {
+  for (auto it = visitors_.begin(); it != visitors_.end(); it++) {
+    if (*it == registered_visitor) {
+      it++;
+      MOZ_ALWAYS_TRUE(visitors_.insert(it, new_visitor));
+      return;
+    }
+  }
+  // We reached the end of the list without finding registered_visitor.
+  MOZ_ALWAYS_TRUE(visitors_.append(new_visitor));
+}
+
+
+void Decoder::RemoveVisitor(DecoderVisitor* visitor) {
+  visitors_.erase(std::remove(visitors_.begin(), visitors_.end(), visitor),
+                  visitors_.end());
+}
+
+
+void Decoder::DecodePCRelAddressing(const Instruction* instr) {
+  VIXL_ASSERT(instr->Bits(27, 24) == 0x0);
+  // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level
+  // decode.
+  VIXL_ASSERT(instr->Bit(28) == 0x1);
+  VisitPCRelAddressing(instr);
+}
+
+
+void Decoder::DecodeBranchSystemException(const Instruction* instr) {
+  VIXL_ASSERT((instr->Bits(27, 24) == 0x4) ||
+              (instr->Bits(27, 24) == 0x5) ||
+              (instr->Bits(27, 24) == 0x6) ||
+              (instr->Bits(27, 24) == 0x7) );
+
+  switch (instr->Bits(31, 29)) {
+    case 0:
+    case 4: {
+      VisitUnconditionalBranch(instr);
+      break;
+    }
+    case 1:
+    case 5: {
+      if (instr->Bit(25) == 0) {
+        VisitCompareBranch(instr);
+      } else {
+        VisitTestBranch(instr);
+      }
+      break;
+    }
+    case 2: {
+      if (instr->Bit(25) == 0) {
+        if ((instr->Bit(24) == 0x1) ||
+            (instr->Mask(0x01000010) == 0x00000010)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitConditionalBranch(instr);
+        }
+      } else {
+        VisitUnallocated(instr);
+      }
+      break;
+    }
+    case 6: {
+      if (instr->Bit(25) == 0) {
+        if (instr->Bit(24) == 0) {
+          if ((instr->Bits(4, 2) != 0) ||
+              (instr->Mask(0x00E0001D) == 0x00200001) ||
+              (instr->Mask(0x00E0001D) == 0x00400001) ||
+              (instr->Mask(0x00E0001E) == 0x00200002) ||
+              (instr->Mask(0x00E0001E) == 0x00400002) ||
+              (instr->Mask(0x00E0001C) == 0x00600000) ||
+              (instr->Mask(0x00E0001C) == 0x00800000) ||
+              (instr->Mask(0x00E0001F) == 0x00A00000) ||
+              (instr->Mask(0x00C0001C) == 0x00C00000)) {
+            if (instr->InstructionBits() == UNDEFINED_INST_PATTERN) {
+                VisitException(instr);
+            } else {
+                VisitUnallocated(instr);
+            }
+          } else {
+            VisitException(instr);
+          }
+        } else {
+          if (instr->Bits(23, 22) == 0) {
+            const Instr masked_003FF0E0 = instr->Mask(0x003FF0E0);
+            if ((instr->Bits(21, 19) == 0x4) ||
+                (masked_003FF0E0 == 0x00033000) ||
+                (masked_003FF0E0 == 0x003FF020) ||
+                (masked_003FF0E0 == 0x003FF060) ||
+                (masked_003FF0E0 == 0x003FF0E0) ||
+                (instr->Mask(0x00388000) == 0x00008000) ||
+                (instr->Mask(0x0038E000) == 0x00000000) ||
+                (instr->Mask(0x0039E000) == 0x00002000) ||
+                (instr->Mask(0x003AE000) == 0x00002000) ||
+                (instr->Mask(0x003CE000) == 0x00042000) ||
+                (instr->Mask(0x003FFFC0) == 0x000320C0) ||
+                (instr->Mask(0x003FF100) == 0x00032100) ||
+                // (instr->Mask(0x003FF200) == 0x00032200) || // match CSDB
+                (instr->Mask(0x003FF400) == 0x00032400) ||
+                (instr->Mask(0x003FF800) == 0x00032800) ||
+                (instr->Mask(0x0038F000) == 0x00005000) ||
+                (instr->Mask(0x0038E000) == 0x00006000)) {
+              VisitUnallocated(instr);
+            } else {
+              VisitSystem(instr);
+            }
+          } else {
+            VisitUnallocated(instr);
+          }
+        }
+      } else {
+        if ((instr->Bit(24) == 0x1) ||
+            (instr->Bits(20, 16) != 0x1F) ||
+            (instr->Bits(15, 10) != 0) ||
+            (instr->Bits(4, 0) != 0) ||
+            (instr->Bits(24, 21) == 0x3) ||
+            (instr->Bits(24, 22) == 0x3)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitUnconditionalBranchToRegister(instr);
+        }
+      }
+      break;
+    }
+    case 3:
+    case 7: {
+      VisitUnallocated(instr);
+      break;
+    }
+  }
+}
+
+
+void Decoder::DecodeLoadStore(const Instruction* instr) {
+  VIXL_ASSERT((instr->Bits(27, 24) == 0x8) ||
+              (instr->Bits(27, 24) == 0x9) ||
+              (instr->Bits(27, 24) == 0xC) ||
+              (instr->Bits(27, 24) == 0xD) );
+  // TODO(all): rearrange the tree to integrate this branch.
+  if ((instr->Bit(28) == 0) && (instr->Bit(29) == 0) && (instr->Bit(26) == 1)) {
+    DecodeNEONLoadStore(instr);
+    return;
+  }
+
+  if (instr->Bit(24) == 0) {
+    if (instr->Bit(28) == 0) {
+      if (instr->Bit(29) == 0) {
+        if (instr->Bit(26) == 0) {
+          VisitLoadStoreExclusive(instr);
+        } else {
+          VIXL_UNREACHABLE();
+        }
+      } else {
+        if ((instr->Bits(31, 30) == 0x3) ||
+            (instr->Mask(0xC4400000) == 0x40000000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(23) == 0) {
+            if (instr->Mask(0xC4400000) == 0xC0400000) {
+              VisitUnallocated(instr);
+            } else {
+              VisitLoadStorePairNonTemporal(instr);
+            }
+          } else {
+            VisitLoadStorePairPostIndex(instr);
+          }
+        }
+      }
+    } else {
+      if (instr->Bit(29) == 0) {
+        if (instr->Mask(0xC4000000) == 0xC4000000) {
+          VisitUnallocated(instr);
+        } else {
+          VisitLoadLiteral(instr);
+        }
+      } else {
+        if ((instr->Mask(0x44800000) == 0x44800000) ||
+            (instr->Mask(0x84800000) == 0x84800000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(21) == 0) {
+            switch (instr->Bits(11, 10)) {
+              case 0: {
+                VisitLoadStoreUnscaledOffset(instr);
+                break;
+              }
+              case 1: {
+                if (instr->Mask(0xC4C00000) == 0xC0800000) {
+                  VisitUnallocated(instr);
+                } else {
+                  VisitLoadStorePostIndex(instr);
+                }
+                break;
+              }
+              case 2: {
+                // TODO: VisitLoadStoreRegisterOffsetUnpriv.
+                VisitUnimplemented(instr);
+                break;
+              }
+              case 3: {
+                if (instr->Mask(0xC4C00000) == 0xC0800000) {
+                  VisitUnallocated(instr);
+                } else {
+                  VisitLoadStorePreIndex(instr);
+                }
+                break;
+              }
+            }
+          } else {
+            if (instr->Bits(11, 10) == 0x2) {
+              if (instr->Bit(14) == 0) {
+                VisitUnallocated(instr);
+              } else {
+                VisitLoadStoreRegisterOffset(instr);
+              }
+            } else {
+              if (instr->Bits(11, 10) == 0x0) {
+                if (instr->Bit(25) == 0) {
+                  if (instr->Bit(26) == 0) {
+                    if ((instr->Bit(15) == 1) &&
+                        ((instr->Bits(14, 12) == 0x1) ||
+                         (instr->Bit(13) == 1) ||
+                         (instr->Bits(14, 12) == 0x5) ||
+                         ((instr->Bits(14, 12) == 0x4) &&
+                          ((instr->Bit(23) == 0) ||
+                           (instr->Bits(23, 22) == 0x3))))) {
+                      VisitUnallocated(instr);
+                    } else {
+                      VisitAtomicMemory(instr);
+                    }
+                  } else {
+                    VisitUnallocated(instr);
+                  }
+                } else {
+                  VisitUnallocated(instr);
+                }
+              } else {
+                VisitUnallocated(instr);
+              }
+            }
+          }
+        }
+      }
+    }
+  } else {
+    if (instr->Bit(28) == 0) {
+      if (instr->Bit(29) == 0) {
+        VisitUnallocated(instr);
+      } else {
+        if ((instr->Bits(31, 30) == 0x3) ||
+            (instr->Mask(0xC4400000) == 0x40000000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(23) == 0) {
+            VisitLoadStorePairOffset(instr);
+          } else {
+            VisitLoadStorePairPreIndex(instr);
+          }
+        }
+      }
+    } else {
+      if (instr->Bit(29) == 0) {
+        VisitUnallocated(instr);
+      } else {
+        if ((instr->Mask(0x84C00000) == 0x80C00000) ||
+            (instr->Mask(0x44800000) == 0x44800000) ||
+            (instr->Mask(0x84800000) == 0x84800000)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitLoadStoreUnsignedOffset(instr);
+        }
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeLogical(const Instruction* instr) {
+  VIXL_ASSERT(instr->Bits(27, 24) == 0x2);
+
+  if (instr->Mask(0x80400000) == 0x00400000) {
+    VisitUnallocated(instr);
+  } else {
+    if (instr->Bit(23) == 0) {
+      VisitLogicalImmediate(instr);
+    } else {
+      if (instr->Bits(30, 29) == 0x1) {
+        VisitUnallocated(instr);
+      } else {
+        VisitMoveWideImmediate(instr);
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeBitfieldExtract(const Instruction* instr) {
+  VIXL_ASSERT(instr->Bits(27, 24) == 0x3);
+
+  if ((instr->Mask(0x80400000) == 0x80000000) ||
+      (instr->Mask(0x80400000) == 0x00400000) ||
+      (instr->Mask(0x80008000) == 0x00008000)) {
+    VisitUnallocated(instr);
+  } else if (instr->Bit(23) == 0) {
+    if ((instr->Mask(0x80200000) == 0x00200000) ||
+        (instr->Mask(0x60000000) == 0x60000000)) {
+      VisitUnallocated(instr);
+    } else {
+      VisitBitfield(instr);
+    }
+  } else {
+    if ((instr->Mask(0x60200000) == 0x00200000) ||
+        (instr->Mask(0x60000000) != 0x00000000)) {
+      VisitUnallocated(instr);
+    } else {
+      VisitExtract(instr);
+    }
+  }
+}
+
+
+void Decoder::DecodeAddSubImmediate(const Instruction* instr) {
+  VIXL_ASSERT(instr->Bits(27, 24) == 0x1);
+  if (instr->Bit(23) == 1) {
+    VisitUnallocated(instr);
+  } else {
+    VisitAddSubImmediate(instr);
+  }
+}
+
+
+void Decoder::DecodeDataProcessing(const Instruction* instr) {
+  VIXL_ASSERT((instr->Bits(27, 24) == 0xA) ||
+              (instr->Bits(27, 24) == 0xB));
+
+  if (instr->Bit(24) == 0) {
+    if (instr->Bit(28) == 0) {
+      if (instr->Mask(0x80008000) == 0x00008000) {
+        VisitUnallocated(instr);
+      } else {
+        VisitLogicalShifted(instr);
+      }
+    } else {
+      switch (instr->Bits(23, 21)) {
+        case 0: {
+          if (instr->Mask(0x0000FC00) != 0) {
+            VisitUnallocated(instr);
+          } else {
+            VisitAddSubWithCarry(instr);
+          }
+          break;
+        }
+        case 2: {
+          if ((instr->Bit(29) == 0) ||
+              (instr->Mask(0x00000410) != 0)) {
+            VisitUnallocated(instr);
+          } else {
+            if (instr->Bit(11) == 0) {
+              VisitConditionalCompareRegister(instr);
+            } else {
+              VisitConditionalCompareImmediate(instr);
+            }
+          }
+          break;
+        }
+        case 4: {
+          if (instr->Mask(0x20000800) != 0x00000000) {
+            VisitUnallocated(instr);
+          } else {
+            VisitConditionalSelect(instr);
+          }
+          break;
+        }
+        case 6: {
+          if (instr->Bit(29) == 0x1) {
+            VisitUnallocated(instr);
+            VIXL_FALLTHROUGH();
+          } else {
+            if (instr->Bit(30) == 0) {
+              if ((instr->Bit(15) == 0x1) ||
+                  (instr->Bits(15, 11) == 0) ||
+                  (instr->Bits(15, 12) == 0x1) ||
+                  (instr->Bits(15, 12) == 0x3) ||
+                  (instr->Bits(15, 13) == 0x3) ||
+                  (instr->Mask(0x8000EC00) == 0x00004C00) ||
+                  (instr->Mask(0x8000E800) == 0x80004000) ||
+                  (instr->Mask(0x8000E400) == 0x80004000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitDataProcessing2Source(instr);
+              }
+            } else {
+              if ((instr->Bit(13) == 1) ||
+                  (instr->Bits(20, 16) != 0) ||
+                  (instr->Bits(15, 14) != 0) ||
+                  (instr->Mask(0xA01FFC00) == 0x00000C00) ||
+                  (instr->Mask(0x201FF800) == 0x00001800)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitDataProcessing1Source(instr);
+              }
+            }
+            break;
+          }
+        }
+        case 1:
+        case 3:
+        case 5:
+        case 7: VisitUnallocated(instr); break;
+      }
+    }
+  } else {
+    if (instr->Bit(28) == 0) {
+     if (instr->Bit(21) == 0) {
+        if ((instr->Bits(23, 22) == 0x3) ||
+            (instr->Mask(0x80008000) == 0x00008000)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitAddSubShifted(instr);
+        }
+      } else {
+        if ((instr->Mask(0x00C00000) != 0x00000000) ||
+            (instr->Mask(0x00001400) == 0x00001400) ||
+            (instr->Mask(0x00001800) == 0x00001800)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitAddSubExtended(instr);
+        }
+      }
+    } else {
+      if ((instr->Bit(30) == 0x1) ||
+          (instr->Bits(30, 29) == 0x1) ||
+          (instr->Mask(0xE0600000) == 0x00200000) ||
+          (instr->Mask(0xE0608000) == 0x00400000) ||
+          (instr->Mask(0x60608000) == 0x00408000) ||
+          (instr->Mask(0x60E00000) == 0x00E00000) ||
+          (instr->Mask(0x60E00000) == 0x00800000) ||
+          (instr->Mask(0x60E00000) == 0x00600000)) {
+        VisitUnallocated(instr);
+      } else {
+        VisitDataProcessing3Source(instr);
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeFP(const Instruction* instr) {
+  VIXL_ASSERT((instr->Bits(27, 24) == 0xE) ||
+              (instr->Bits(27, 24) == 0xF));
+  if (instr->Bit(28) == 0) {
+    DecodeNEONVectorDataProcessing(instr);
+  } else {
+    if (instr->Bits(31, 30) == 0x3) {
+      VisitUnallocated(instr);
+    } else if (instr->Bits(31, 30) == 0x1) {
+      DecodeNEONScalarDataProcessing(instr);
+    } else {
+      if (instr->Bit(29) == 0) {
+        if (instr->Bit(24) == 0) {
+          if (instr->Bit(21) == 0) {
+            if ((instr->Bit(23) == 1) ||
+                (instr->Bit(18) == 1) ||
+                (instr->Mask(0x80008000) == 0x00000000) ||
+                (instr->Mask(0x000E0000) == 0x00000000) ||
+                (instr->Mask(0x000E0000) == 0x000A0000) ||
+                (instr->Mask(0x00160000) == 0x00000000) ||
+                (instr->Mask(0x00160000) == 0x00120000)) {
+              VisitUnallocated(instr);
+            } else {
+              VisitFPFixedPointConvert(instr);
+            }
+          } else {
+            if (instr->Bits(15, 10) == 32) {
+              VisitUnallocated(instr);
+            } else if (instr->Bits(15, 10) == 0) {
+              if ((instr->Bits(23, 22) == 0x3) ||
+                  (instr->Mask(0x000E0000) == 0x000A0000) ||
+                  (instr->Mask(0x000E0000) == 0x000C0000) ||
+                  (instr->Mask(0x00160000) == 0x00120000) ||
+                  (instr->Mask(0x00160000) == 0x00140000) ||
+                  (instr->Mask(0x20C40000) == 0x00800000) ||
+                  (instr->Mask(0x20C60000) == 0x00840000) ||
+                  (instr->Mask(0xA0C60000) == 0x80060000) ||
+                  (instr->Mask(0xA0C60000) == 0x00860000) ||
+                  (instr->Mask(0xA0C60000) == 0x00460000) ||
+                  (instr->Mask(0xA0CE0000) == 0x80860000) ||
+                  (instr->Mask(0xA0CE0000) == 0x804E0000) ||
+                  (instr->Mask(0xA0CE0000) == 0x000E0000) ||
+                  (instr->Mask(0xA0D60000) == 0x00160000) ||
+                  (instr->Mask(0xA0D60000) == 0x80560000) ||
+                  (instr->Mask(0xA0D60000) == 0x80960000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPIntegerConvert(instr);
+              }
+            } else if (instr->Bits(14, 10) == 16) {
+              const Instr masked_A0DF8000 = instr->Mask(0xA0DF8000);
+              if ((instr->Mask(0x80180000) != 0) ||
+                  (masked_A0DF8000 == 0x00020000) ||
+                  (masked_A0DF8000 == 0x00030000) ||
+                  (masked_A0DF8000 == 0x00068000) ||
+                  (masked_A0DF8000 == 0x00428000) ||
+                  (masked_A0DF8000 == 0x00430000) ||
+                  (masked_A0DF8000 == 0x00468000) ||
+                  (instr->Mask(0xA0D80000) == 0x00800000) ||
+                  (instr->Mask(0xA0DE0000) == 0x00C00000) ||
+                  (instr->Mask(0xA0DF0000) == 0x00C30000) ||
+                  (instr->Mask(0xA0DC0000) == 0x00C40000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPDataProcessing1Source(instr);
+              }
+            } else if (instr->Bits(13, 10) == 8) {
+              if ((instr->Bits(15, 14) != 0) ||
+                  (instr->Bits(2, 0) != 0) ||
+                  (instr->Mask(0x80800000) != 0x00000000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPCompare(instr);
+              }
+            } else if (instr->Bits(12, 10) == 4) {
+              if ((instr->Bits(9, 5) != 0) ||
+                  (instr->Mask(0x80800000) != 0x00000000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPImmediate(instr);
+              }
+            } else {
+              if (instr->Mask(0x80800000) != 0x00000000) {
+                VisitUnallocated(instr);
+              } else {
+                switch (instr->Bits(11, 10)) {
+                  case 1: {
+                    VisitFPConditionalCompare(instr);
+                    break;
+                  }
+                  case 2: {
+                    if ((instr->Bits(15, 14) == 0x3) ||
+                        (instr->Mask(0x00009000) == 0x00009000) ||
+                        (instr->Mask(0x0000A000) == 0x0000A000)) {
+                      VisitUnallocated(instr);
+                    } else {
+                      VisitFPDataProcessing2Source(instr);
+                    }
+                    break;
+                  }
+                  case 3: {
+                    VisitFPConditionalSelect(instr);
+                    break;
+                  }
+                  default: VIXL_UNREACHABLE();
+                }
+              }
+            }
+          }
+        } else {
+          // Bit 30 == 1 has been handled earlier.
+          VIXL_ASSERT(instr->Bit(30) == 0);
+          if (instr->Mask(0xA0800000) != 0) {
+            VisitUnallocated(instr);
+          } else {
+            VisitFPDataProcessing3Source(instr);
+          }
+        }
+      } else {
+        VisitUnallocated(instr);
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeNEONLoadStore(const Instruction* instr) {
+  VIXL_ASSERT(instr->Bits(29, 25) == 0x6);
+  if (instr->Bit(31) == 0) {
+    if ((instr->Bit(24) == 0) && (instr->Bit(21) == 1)) {
+      VisitUnallocated(instr);
+      return;
+    }
+
+    if (instr->Bit(23) == 0) {
+      if (instr->Bits(20, 16) == 0) {
+        if (instr->Bit(24) == 0) {
+          VisitNEONLoadStoreMultiStruct(instr);
+        } else {
+          VisitNEONLoadStoreSingleStruct(instr);
+        }
+      } else {
+        VisitUnallocated(instr);
+      }
+    } else {
+      if (instr->Bit(24) == 0) {
+        VisitNEONLoadStoreMultiStructPostIndex(instr);
+      } else {
+        VisitNEONLoadStoreSingleStructPostIndex(instr);
+      }
+    }
+  } else {
+    VisitUnallocated(instr);
+  }
+}
+
+
+void Decoder::DecodeNEONVectorDataProcessing(const Instruction* instr) {
+  VIXL_ASSERT(instr->Bits(28, 25) == 0x7);
+  if (instr->Bit(31) == 0) {
+    if (instr->Bit(24) == 0) {
+      if (instr->Bit(21) == 0) {
+        if (instr->Bit(15) == 0) {
+          if (instr->Bit(10) == 0) {
+            if (instr->Bit(29) == 0) {
+              if (instr->Bit(11) == 0) {
+                VisitNEONTable(instr);
+              } else {
+                VisitNEONPerm(instr);
+              }
+            } else {
+              VisitNEONExtract(instr);
+            }
+          } else {
+            if (instr->Bits(23, 22) == 0) {
+              VisitNEONCopy(instr);
+            } else {
+              VisitUnallocated(instr);
+            }
+          }
+        } else {
+          VisitUnallocated(instr);
+        }
+      } else {
+        if (instr->Bit(10) == 0) {
+          if (instr->Bit(11) == 0) {
+            VisitNEON3Different(instr);
+          } else {
+            if (instr->Bits(18, 17) == 0) {
+              if (instr->Bit(20) == 0) {
+                if (instr->Bit(19) == 0) {
+                  VisitNEON2RegMisc(instr);
+                } else {
+                  if (instr->Bits(30, 29) == 0x2) {
+                    VisitCryptoAES(instr);
+                  } else {
+                    VisitUnallocated(instr);
+                  }
+                }
+              } else {
+                if (instr->Bit(19) == 0) {
+                  VisitNEONAcrossLanes(instr);
+                } else {
+                  VisitUnallocated(instr);
+                }
+              }
+            } else {
+              VisitUnallocated(instr);
+            }
+          }
+        } else {
+          VisitNEON3Same(instr);
+        }
+      }
+    } else {
+      if (instr->Bit(10) == 0) {
+        VisitNEONByIndexedElement(instr);
+      } else {
+        if (instr->Bit(23) == 0) {
+          if (instr->Bits(22, 19) == 0) {
+            VisitNEONModifiedImmediate(instr);
+          } else {
+            VisitNEONShiftImmediate(instr);
+          }
+        } else {
+          VisitUnallocated(instr);
+        }
+      }
+    }
+  } else {
+    VisitUnallocated(instr);
+  }
+}
+
+
+void Decoder::DecodeNEONScalarDataProcessing(const Instruction* instr) {
+  VIXL_ASSERT(instr->Bits(28, 25) == 0xF);
+  if (instr->Bit(24) == 0) {
+    if (instr->Bit(21) == 0) {
+      if (instr->Bit(15) == 0) {
+        if (instr->Bit(10) == 0) {
+          if (instr->Bit(29) == 0) {
+            if (instr->Bit(11) == 0) {
+              VisitCrypto3RegSHA(instr);
+            } else {
+              VisitUnallocated(instr);
+            }
+          } else {
+            VisitUnallocated(instr);
+          }
+        } else {
+          if (instr->Bits(23, 22) == 0) {
+            VisitNEONScalarCopy(instr);
+          } else {
+            VisitUnallocated(instr);
+          }
+        }
+      } else {
+        VisitUnallocated(instr);
+      }
+    } else {
+      if (instr->Bit(10) == 0) {
+        if (instr->Bit(11) == 0) {
+          VisitNEONScalar3Diff(instr);
+        } else {
+          if (instr->Bits(18, 17) == 0) {
+            if (instr->Bit(20) == 0) {
+              if (instr->Bit(19) == 0) {
+                VisitNEONScalar2RegMisc(instr);
+              } else {
+                if (instr->Bit(29) == 0) {
+                  VisitCrypto2RegSHA(instr);
+                } else {
+                  VisitUnallocated(instr);
+                }
+              }
+            } else {
+              if (instr->Bit(19) == 0) {
+                VisitNEONScalarPairwise(instr);
+              } else {
+                VisitUnallocated(instr);
+              }
+            }
+          } else {
+            VisitUnallocated(instr);
+          }
+        }
+      } else {
+        VisitNEONScalar3Same(instr);
+      }
+    }
+  } else {
+    if (instr->Bit(10) == 0) {
+      VisitNEONScalarByIndexedElement(instr);
+    } else {
+      if (instr->Bit(23) == 0) {
+        VisitNEONScalarShiftImmediate(instr);
+      } else {
+        VisitUnallocated(instr);
+      }
+    }
+  }
+}
+
+
+#define DEFINE_VISITOR_CALLERS(A)                                              \
+  void Decoder::Visit##A(const Instruction *instr) {                           \
+    VIXL_ASSERT(instr->Mask(A##FMask) == A##Fixed);                            \
+    for (auto visitor : visitors_) {                                           \
+      visitor->Visit##A(instr);                                                \
+    }                                                                          \
+  }
+VISITOR_LIST(DEFINE_VISITOR_CALLERS)
+#undef DEFINE_VISITOR_CALLERS
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Decoder-vixl.h b/js/src/jit/arm64/vixl/Decoder-vixl.h
new file mode 100644
index 0000000000..1b3cf172ac
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Decoder-vixl.h
@@ -0,0 +1,276 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_DECODER_A64_H_
+#define VIXL_A64_DECODER_A64_H_
+
+#include "mozilla/Vector.h"
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "js/AllocPolicy.h"
+
+
+// List macro containing all visitors needed by the decoder class.
+
+#define VISITOR_LIST_THAT_RETURN(V)     \
+  V(PCRelAddressing)                    \
+  V(AddSubImmediate)                    \
+  V(LogicalImmediate)                   \
+  V(MoveWideImmediate)                  \
+  V(AtomicMemory)                       \
+  V(Bitfield)                           \
+  V(Extract)                            \
+  V(UnconditionalBranch)                \
+  V(UnconditionalBranchToRegister)      \
+  V(CompareBranch)                      \
+  V(TestBranch)                         \
+  V(ConditionalBranch)                  \
+  V(System)                             \
+  V(Exception)                          \
+  V(LoadStorePairPostIndex)             \
+  V(LoadStorePairOffset)                \
+  V(LoadStorePairPreIndex)              \
+  V(LoadStorePairNonTemporal)           \
+  V(LoadLiteral)                        \
+  V(LoadStoreUnscaledOffset)            \
+  V(LoadStorePostIndex)                 \
+  V(LoadStorePreIndex)                  \
+  V(LoadStoreRegisterOffset)            \
+  V(LoadStoreUnsignedOffset)            \
+  V(LoadStoreExclusive)                 \
+  V(LogicalShifted)                     \
+  V(AddSubShifted)                      \
+  V(AddSubExtended)                     \
+  V(AddSubWithCarry)                    \
+  V(ConditionalCompareRegister)         \
+  V(ConditionalCompareImmediate)        \
+  V(ConditionalSelect)                  \
+  V(DataProcessing1Source)              \
+  V(DataProcessing2Source)              \
+  V(DataProcessing3Source)              \
+  V(FPCompare)                          \
+  V(FPConditionalCompare)               \
+  V(FPConditionalSelect)                \
+  V(FPImmediate)                        \
+  V(FPDataProcessing1Source)            \
+  V(FPDataProcessing2Source)            \
+  V(FPDataProcessing3Source)            \
+  V(FPIntegerConvert)                   \
+  V(FPFixedPointConvert)                \
+  V(Crypto2RegSHA)                      \
+  V(Crypto3RegSHA)                      \
+  V(CryptoAES)                          \
+  V(NEON2RegMisc)                       \
+  V(NEON3Different)                     \
+  V(NEON3Same)                          \
+  V(NEONAcrossLanes)                    \
+  V(NEONByIndexedElement)               \
+  V(NEONCopy)                           \
+  V(NEONExtract)                        \
+  V(NEONLoadStoreMultiStruct)           \
+  V(NEONLoadStoreMultiStructPostIndex)  \
+  V(NEONLoadStoreSingleStruct)          \
+  V(NEONLoadStoreSingleStructPostIndex) \
+  V(NEONModifiedImmediate)              \
+  V(NEONScalar2RegMisc)                 \
+  V(NEONScalar3Diff)                    \
+  V(NEONScalar3Same)                    \
+  V(NEONScalarByIndexedElement)         \
+  V(NEONScalarCopy)                     \
+  V(NEONScalarPairwise)                 \
+  V(NEONScalarShiftImmediate)           \
+  V(NEONShiftImmediate)                 \
+  V(NEONTable)                          \
+  V(NEONPerm)
+
+#define VISITOR_LIST_THAT_DONT_RETURN(V)  \
+  V(Unallocated)                          \
+  V(Unimplemented)                        \
+
+#define VISITOR_LIST(V)             \
+  VISITOR_LIST_THAT_RETURN(V)       \
+  VISITOR_LIST_THAT_DONT_RETURN(V)  \
+
+namespace vixl {
+
+// The Visitor interface. Disassembler and simulator (and other tools)
+// must provide implementations for all of these functions.
+class DecoderVisitor {
+ public:
+  enum VisitorConstness {
+    kConstVisitor,
+    kNonConstVisitor
+  };
+  explicit DecoderVisitor(VisitorConstness constness = kConstVisitor)
+      : constness_(constness) {}
+
+  virtual ~DecoderVisitor() {}
+
+  #define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0;
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+  bool IsConstVisitor() const { return constness_ == kConstVisitor; }
+  Instruction* MutableInstruction(const Instruction* instr) {
+    VIXL_ASSERT(!IsConstVisitor());
+    return const_cast<Instruction*>(instr);
+  }
+
+ private:
+  const VisitorConstness constness_;
+};
+
+
+class Decoder {
+ public:
+  Decoder() {}
+
+  // Top-level wrappers around the actual decoding function.
+  void Decode(const Instruction* instr) {
+#ifdef DEBUG
+    for (auto visitor : visitors_) {
+      VIXL_ASSERT(visitor->IsConstVisitor());
+    }
+#endif
+    DecodeInstruction(instr);
+  }
+  void Decode(Instruction* instr) {
+    DecodeInstruction(const_cast<const Instruction*>(instr));
+  }
+
+  // Register a new visitor class with the decoder.
+  // Decode() will call the corresponding visitor method from all registered
+  // visitor classes when decoding reaches the leaf node of the instruction
+  // decode tree.
+  // Visitors are called in order.
+  // A visitor can be registered multiple times.
+  //
+  //   d.AppendVisitor(V1);
+  //   d.AppendVisitor(V2);
+  //   d.PrependVisitor(V2);
+  //   d.AppendVisitor(V3);
+  //
+  //   d.Decode(i);
+  //
+  // will call in order visitor methods in V2, V1, V2, V3.
+  void AppendVisitor(DecoderVisitor* visitor);
+  void PrependVisitor(DecoderVisitor* visitor);
+  // These helpers register `new_visitor` before or after the first instance of
+  // `registered_visiter` in the list.
+  // So if
+  //   V1, V2, V1, V2
+  // are registered in this order in the decoder, calls to
+  //   d.InsertVisitorAfter(V3, V1);
+  //   d.InsertVisitorBefore(V4, V2);
+  // will yield the order
+  //   V1, V3, V4, V2, V1, V2
+  //
+  // For more complex modifications of the order of registered visitors, one can
+  // directly access and modify the list of visitors via the `visitors()'
+  // accessor.
+  void InsertVisitorBefore(DecoderVisitor* new_visitor,
+                           DecoderVisitor* registered_visitor);
+  void InsertVisitorAfter(DecoderVisitor* new_visitor,
+                          DecoderVisitor* registered_visitor);
+
+  // Remove all instances of a previously registered visitor class from the list
+  // of visitors stored by the decoder.
+  void RemoveVisitor(DecoderVisitor* visitor);
+
+  #define DECLARE(A) void Visit##A(const Instruction* instr);
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+
+ private:
+  // Decodes an instruction and calls the visitor functions registered with the
+  // Decoder class.
+  void DecodeInstruction(const Instruction* instr);
+
+  // Decode the PC relative addressing instruction, and call the corresponding
+  // visitors.
+  // On entry, instruction bits 27:24 = 0x0.
+  void DecodePCRelAddressing(const Instruction* instr);
+
+  // Decode the add/subtract immediate instruction, and call the correspoding
+  // visitors.
+  // On entry, instruction bits 27:24 = 0x1.
+  void DecodeAddSubImmediate(const Instruction* instr);
+
+  // Decode the branch, system command, and exception generation parts of
+  // the instruction tree, and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
+  void DecodeBranchSystemException(const Instruction* instr);
+
+  // Decode the load and store parts of the instruction tree, and call
+  // the corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
+  void DecodeLoadStore(const Instruction* instr);
+
+  // Decode the logical immediate and move wide immediate parts of the
+  // instruction tree, and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = 0x2.
+  void DecodeLogical(const Instruction* instr);
+
+  // Decode the bitfield and extraction parts of the instruction tree,
+  // and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = 0x3.
+  void DecodeBitfieldExtract(const Instruction* instr);
+
+  // Decode the data processing parts of the instruction tree, and call the
+  // corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
+  void DecodeDataProcessing(const Instruction* instr);
+
+  // Decode the floating point parts of the instruction tree, and call the
+  // corresponding visitors.
+  // On entry, instruction bits 27:24 = {0xE, 0xF}.
+  void DecodeFP(const Instruction* instr);
+
+  // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
+  // and call the corresponding visitors.
+  // On entry, instruction bits 29:25 = 0x6.
+  void DecodeNEONLoadStore(const Instruction* instr);
+
+  // Decode the Advanced SIMD (NEON) vector data processing part of the
+  // instruction tree, and call the corresponding visitors.
+  // On entry, instruction bits 28:25 = 0x7.
+  void DecodeNEONVectorDataProcessing(const Instruction* instr);
+
+  // Decode the Advanced SIMD (NEON) scalar data processing part of the
+  // instruction tree, and call the corresponding visitors.
+  // On entry, instruction bits 28:25 = 0xF.
+  void DecodeNEONScalarDataProcessing(const Instruction* instr);
+
+ private:
+  // Visitors are registered in a list.
+  mozilla::Vector<DecoderVisitor*, 8, js::SystemAllocPolicy> visitors_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_A64_DECODER_A64_H_
diff --git a/js/src/jit/arm64/vixl/Disasm-vixl.cpp b/js/src/jit/arm64/vixl/Disasm-vixl.cpp
new file mode 100644
index 0000000000..1116ebb67b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Disasm-vixl.cpp
@@ -0,0 +1,3741 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Disasm-vixl.h"
+
+#include "mozilla/Sprintf.h"
+#include <cstdlib>
+
+namespace vixl {
+
+Disassembler::Disassembler() {
+  buffer_size_ = 256;
+  buffer_ = reinterpret_cast<char*>(malloc(buffer_size_));
+  buffer_pos_ = 0;
+  own_buffer_ = true;
+  code_address_offset_ = 0;
+}
+
+
+Disassembler::Disassembler(char* text_buffer, int buffer_size) {
+  buffer_size_ = buffer_size;
+  buffer_ = text_buffer;
+  buffer_pos_ = 0;
+  own_buffer_ = false;
+  code_address_offset_ = 0;
+}
+
+
+Disassembler::~Disassembler() {
+  if (own_buffer_) {
+    free(buffer_);
+  }
+}
+
+
+char* Disassembler::GetOutput() {
+  return buffer_;
+}
+
+
+void Disassembler::VisitAddSubImmediate(const Instruction* instr) {
+  bool rd_is_zr = RdIsZROrSP(instr);
+  bool stack_op = (rd_is_zr || RnIsZROrSP(instr)) &&
+                  (instr->ImmAddSub() == 0) ? true : false;
+  const char *mnemonic = "";
+  const char *form = "'Rds, 'Rns, 'IAddSub";
+  const char *form_cmp = "'Rns, 'IAddSub";
+  const char *form_mov = "'Rds, 'Rns";
+
+  switch (instr->Mask(AddSubImmediateMask)) {
+    case ADD_w_imm:
+    case ADD_x_imm: {
+      mnemonic = "add";
+      if (stack_op) {
+        mnemonic = "mov";
+        form = form_mov;
+      }
+      break;
+    }
+    case ADDS_w_imm:
+    case ADDS_x_imm: {
+      mnemonic = "adds";
+      if (rd_is_zr) {
+        mnemonic = "cmn";
+        form = form_cmp;
+      }
+      break;
+    }
+    case SUB_w_imm:
+    case SUB_x_imm: mnemonic = "sub"; break;
+    case SUBS_w_imm:
+    case SUBS_x_imm: {
+      mnemonic = "subs";
+      if (rd_is_zr) {
+        mnemonic = "cmp";
+        form = form_cmp;
+      }
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitAddSubShifted(const Instruction* instr) {
+  bool rd_is_zr = RdIsZROrSP(instr);
+  bool rn_is_zr = RnIsZROrSP(instr);
+  const char *mnemonic = "";
+  const char *form = "'Rd, 'Rn, 'Rm'NDP";
+  const char *form_cmp = "'Rn, 'Rm'NDP";
+  const char *form_neg = "'Rd, 'Rm'NDP";
+
+  switch (instr->Mask(AddSubShiftedMask)) {
+    case ADD_w_shift:
+    case ADD_x_shift: mnemonic = "add"; break;
+    case ADDS_w_shift:
+    case ADDS_x_shift: {
+      mnemonic = "adds";
+      if (rd_is_zr) {
+        mnemonic = "cmn";
+        form = form_cmp;
+      }
+      break;
+    }
+    case SUB_w_shift:
+    case SUB_x_shift: {
+      mnemonic = "sub";
+      if (rn_is_zr) {
+        mnemonic = "neg";
+        form = form_neg;
+      }
+      break;
+    }
+    case SUBS_w_shift:
+    case SUBS_x_shift: {
+      mnemonic = "subs";
+      if (rd_is_zr) {
+        mnemonic = "cmp";
+        form = form_cmp;
+      } else if (rn_is_zr) {
+        mnemonic = "negs";
+        form = form_neg;
+      }
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitAddSubExtended(const Instruction* instr) {
+  bool rd_is_zr = RdIsZROrSP(instr);
+  const char *mnemonic = "";
+  Extend mode = static_cast<Extend>(instr->ExtendMode());
+  const char *form = ((mode == UXTX) || (mode == SXTX)) ?
+                     "'Rds, 'Rns, 'Xm'Ext" : "'Rds, 'Rns, 'Wm'Ext";
+  const char *form_cmp = ((mode == UXTX) || (mode == SXTX)) ?
+                         "'Rns, 'Xm'Ext" : "'Rns, 'Wm'Ext";
+
+  switch (instr->Mask(AddSubExtendedMask)) {
+    case ADD_w_ext:
+    case ADD_x_ext: mnemonic = "add"; break;
+    case ADDS_w_ext:
+    case ADDS_x_ext: {
+      mnemonic = "adds";
+      if (rd_is_zr) {
+        mnemonic = "cmn";
+        form = form_cmp;
+      }
+      break;
+    }
+    case SUB_w_ext:
+    case SUB_x_ext: mnemonic = "sub"; break;
+    case SUBS_w_ext:
+    case SUBS_x_ext: {
+      mnemonic = "subs";
+      if (rd_is_zr) {
+        mnemonic = "cmp";
+        form = form_cmp;
+      }
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitAddSubWithCarry(const Instruction* instr) {
+  bool rn_is_zr = RnIsZROrSP(instr);
+  const char *mnemonic = "";
+  const char *form = "'Rd, 'Rn, 'Rm";
+  const char *form_neg = "'Rd, 'Rm";
+
+  switch (instr->Mask(AddSubWithCarryMask)) {
+    case ADC_w:
+    case ADC_x: mnemonic = "adc"; break;
+    case ADCS_w:
+    case ADCS_x: mnemonic = "adcs"; break;
+    case SBC_w:
+    case SBC_x: {
+      mnemonic = "sbc";
+      if (rn_is_zr) {
+        mnemonic = "ngc";
+        form = form_neg;
+      }
+      break;
+    }
+    case SBCS_w:
+    case SBCS_x: {
+      mnemonic = "sbcs";
+      if (rn_is_zr) {
+        mnemonic = "ngcs";
+        form = form_neg;
+      }
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLogicalImmediate(const Instruction* instr) {
+  bool rd_is_zr = RdIsZROrSP(instr);
+  bool rn_is_zr = RnIsZROrSP(instr);
+  const char *mnemonic = "";
+  const char *form = "'Rds, 'Rn, 'ITri";
+
+  if (instr->ImmLogical() == 0) {
+    // The immediate encoded in the instruction is not in the expected format.
+    Format(instr, "unallocated", "(LogicalImmediate)");
+    return;
+  }
+
+  switch (instr->Mask(LogicalImmediateMask)) {
+    case AND_w_imm:
+    case AND_x_imm: mnemonic = "and"; break;
+    case ORR_w_imm:
+    case ORR_x_imm: {
+      mnemonic = "orr";
+      unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize
+                                                        : kWRegSize;
+      if (rn_is_zr && !IsMovzMovnImm(reg_size, instr->ImmLogical())) {
+        mnemonic = "mov";
+        form = "'Rds, 'ITri";
+      }
+      break;
+    }
+    case EOR_w_imm:
+    case EOR_x_imm: mnemonic = "eor"; break;
+    case ANDS_w_imm:
+    case ANDS_x_imm: {
+      mnemonic = "ands";
+      if (rd_is_zr) {
+        mnemonic = "tst";
+        form = "'Rn, 'ITri";
+      }
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+bool Disassembler::IsMovzMovnImm(unsigned reg_size, uint64_t value) {
+  VIXL_ASSERT((reg_size == kXRegSize) ||
+              ((reg_size == kWRegSize) && (value <= 0xffffffff)));
+
+  // Test for movz: 16 bits set at positions 0, 16, 32 or 48.
+  if (((value & UINT64_C(0xffffffffffff0000)) == 0) ||
+      ((value & UINT64_C(0xffffffff0000ffff)) == 0) ||
+      ((value & UINT64_C(0xffff0000ffffffff)) == 0) ||
+      ((value & UINT64_C(0x0000ffffffffffff)) == 0)) {
+    return true;
+  }
+
+  // Test for movn: NOT(16 bits set at positions 0, 16, 32 or 48).
+  if ((reg_size == kXRegSize) &&
+      (((~value & UINT64_C(0xffffffffffff0000)) == 0) ||
+       ((~value & UINT64_C(0xffffffff0000ffff)) == 0) ||
+       ((~value & UINT64_C(0xffff0000ffffffff)) == 0) ||
+       ((~value & UINT64_C(0x0000ffffffffffff)) == 0))) {
+    return true;
+  }
+  if ((reg_size == kWRegSize) &&
+      (((value & 0xffff0000) == 0xffff0000) ||
+       ((value & 0x0000ffff) == 0x0000ffff))) {
+    return true;
+  }
+  return false;
+}
+
+
+void Disassembler::VisitLogicalShifted(const Instruction* instr) {
+  bool rd_is_zr = RdIsZROrSP(instr);
+  bool rn_is_zr = RnIsZROrSP(instr);
+  const char *mnemonic = "";
+  const char *form = "'Rd, 'Rn, 'Rm'NLo";
+
+  switch (instr->Mask(LogicalShiftedMask)) {
+    case AND_w:
+    case AND_x: mnemonic = "and"; break;
+    case BIC_w:
+    case BIC_x: mnemonic = "bic"; break;
+    case EOR_w:
+    case EOR_x: mnemonic = "eor"; break;
+    case EON_w:
+    case EON_x: mnemonic = "eon"; break;
+    case BICS_w:
+    case BICS_x: mnemonic = "bics"; break;
+    case ANDS_w:
+    case ANDS_x: {
+      mnemonic = "ands";
+      if (rd_is_zr) {
+        mnemonic = "tst";
+        form = "'Rn, 'Rm'NLo";
+      }
+      break;
+    }
+    case ORR_w:
+    case ORR_x: {
+      mnemonic = "orr";
+      if (rn_is_zr && (instr->ImmDPShift() == 0) && (instr->ShiftDP() == LSL)) {
+        mnemonic = "mov";
+        form = "'Rd, 'Rm";
+      }
+      break;
+    }
+    case ORN_w:
+    case ORN_x: {
+      mnemonic = "orn";
+      if (rn_is_zr) {
+        mnemonic = "mvn";
+        form = "'Rd, 'Rm'NLo";
+      }
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitConditionalCompareRegister(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Rn, 'Rm, 'INzcv, 'Cond";
+
+  switch (instr->Mask(ConditionalCompareRegisterMask)) {
+    case CCMN_w:
+    case CCMN_x: mnemonic = "ccmn"; break;
+    case CCMP_w:
+    case CCMP_x: mnemonic = "ccmp"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitConditionalCompareImmediate(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Rn, 'IP, 'INzcv, 'Cond";
+
+  switch (instr->Mask(ConditionalCompareImmediateMask)) {
+    case CCMN_w_imm:
+    case CCMN_x_imm: mnemonic = "ccmn"; break;
+    case CCMP_w_imm:
+    case CCMP_x_imm: mnemonic = "ccmp"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitConditionalSelect(const Instruction* instr) {
+  bool rnm_is_zr = (RnIsZROrSP(instr) && RmIsZROrSP(instr));
+  bool rn_is_rm = (instr->Rn() == instr->Rm());
+  const char *mnemonic = "";
+  const char *form = "'Rd, 'Rn, 'Rm, 'Cond";
+  const char *form_test = "'Rd, 'CInv";
+  const char *form_update = "'Rd, 'Rn, 'CInv";
+
+  Condition cond = static_cast<Condition>(instr->Condition());
+  bool invertible_cond = (cond != al) && (cond != nv);
+
+  switch (instr->Mask(ConditionalSelectMask)) {
+    case CSEL_w:
+    case CSEL_x: mnemonic = "csel"; break;
+    case CSINC_w:
+    case CSINC_x: {
+      mnemonic = "csinc";
+      if (rnm_is_zr && invertible_cond) {
+        mnemonic = "cset";
+        form = form_test;
+      } else if (rn_is_rm && invertible_cond) {
+        mnemonic = "cinc";
+        form = form_update;
+      }
+      break;
+    }
+    case CSINV_w:
+    case CSINV_x: {
+      mnemonic = "csinv";
+      if (rnm_is_zr && invertible_cond) {
+        mnemonic = "csetm";
+        form = form_test;
+      } else if (rn_is_rm && invertible_cond) {
+        mnemonic = "cinv";
+        form = form_update;
+      }
+      break;
+    }
+    case CSNEG_w:
+    case CSNEG_x: {
+      mnemonic = "csneg";
+      if (rn_is_rm && invertible_cond) {
+        mnemonic = "cneg";
+        form = form_update;
+      }
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitBitfield(const Instruction* instr) {
+  unsigned s = instr->ImmS();
+  unsigned r = instr->ImmR();
+  unsigned rd_size_minus_1 =
+    ((instr->SixtyFourBits() == 1) ? kXRegSize : kWRegSize) - 1;
+  const char *mnemonic = "";
+  const char *form = "";
+  const char *form_shift_right = "'Rd, 'Rn, 'IBr";
+  const char *form_extend = "'Rd, 'Wn";
+  const char *form_bfiz = "'Rd, 'Rn, 'IBZ-r, 'IBs+1";
+  const char *form_bfx = "'Rd, 'Rn, 'IBr, 'IBs-r+1";
+  const char *form_lsl = "'Rd, 'Rn, 'IBZ-r";
+
+  switch (instr->Mask(BitfieldMask)) {
+    case SBFM_w:
+    case SBFM_x: {
+      mnemonic = "sbfx";
+      form = form_bfx;
+      if (r == 0) {
+        form = form_extend;
+        if (s == 7) {
+          mnemonic = "sxtb";
+        } else if (s == 15) {
+          mnemonic = "sxth";
+        } else if ((s == 31) && (instr->SixtyFourBits() == 1)) {
+          mnemonic = "sxtw";
+        } else {
+          form = form_bfx;
+        }
+      } else if (s == rd_size_minus_1) {
+        mnemonic = "asr";
+        form = form_shift_right;
+      } else if (s < r) {
+        mnemonic = "sbfiz";
+        form = form_bfiz;
+      }
+      break;
+    }
+    case UBFM_w:
+    case UBFM_x: {
+      mnemonic = "ubfx";
+      form = form_bfx;
+      if (r == 0) {
+        form = form_extend;
+        if (s == 7) {
+          mnemonic = "uxtb";
+        } else if (s == 15) {
+          mnemonic = "uxth";
+        } else {
+          form = form_bfx;
+        }
+      }
+      if (s == rd_size_minus_1) {
+        mnemonic = "lsr";
+        form = form_shift_right;
+      } else if (r == s + 1) {
+        mnemonic = "lsl";
+        form = form_lsl;
+      } else if (s < r) {
+        mnemonic = "ubfiz";
+        form = form_bfiz;
+      }
+      break;
+    }
+    case BFM_w:
+    case BFM_x: {
+      mnemonic = "bfxil";
+      form = form_bfx;
+      if (s < r) {
+        mnemonic = "bfi";
+        form = form_bfiz;
+      }
+    }
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitExtract(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Rd, 'Rn, 'Rm, 'IExtract";
+
+  switch (instr->Mask(ExtractMask)) {
+    case EXTR_w:
+    case EXTR_x: {
+      if (instr->Rn() == instr->Rm()) {
+        mnemonic = "ror";
+        form = "'Rd, 'Rn, 'IExtract";
+      } else {
+        mnemonic = "extr";
+      }
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitPCRelAddressing(const Instruction* instr) {
+  switch (instr->Mask(PCRelAddressingMask)) {
+    case ADR: Format(instr, "adr", "'Xd, 'AddrPCRelByte"); break;
+    case ADRP: Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); break;
+    default: Format(instr, "unimplemented", "(PCRelAddressing)");
+  }
+}
+
+
+void Disassembler::VisitConditionalBranch(const Instruction* instr) {
+  switch (instr->Mask(ConditionalBranchMask)) {
+    case B_cond: Format(instr, "b.'CBrn", "'TImmCond"); break;
+    default: VIXL_UNREACHABLE();
+  }
+}
+
+
+void Disassembler::VisitUnconditionalBranchToRegister(
+    const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Xn";
+
+  switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
+    case BR: mnemonic = "br"; break;
+    case BLR: mnemonic = "blr"; break;
+    case RET: {
+      mnemonic = "ret";
+      if (instr->Rn() == kLinkRegCode) {
+        form = NULL;
+      }
+      break;
+    }
+    default: form = "(UnconditionalBranchToRegister)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitUnconditionalBranch(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'TImmUncn";
+
+  switch (instr->Mask(UnconditionalBranchMask)) {
+    case B: mnemonic = "b"; break;
+    case BL: mnemonic = "bl"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitDataProcessing1Source(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Rd, 'Rn";
+
+  switch (instr->Mask(DataProcessing1SourceMask)) {
+    #define FORMAT(A, B)  \
+    case A##_w:           \
+    case A##_x: mnemonic = B; break;
+    FORMAT(RBIT, "rbit");
+    FORMAT(REV16, "rev16");
+    FORMAT(REV, "rev");
+    FORMAT(CLZ, "clz");
+    FORMAT(CLS, "cls");
+    #undef FORMAT
+    case REV32_x: mnemonic = "rev32"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitDataProcessing2Source(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Rd, 'Rn, 'Rm";
+  const char *form_wwx = "'Wd, 'Wn, 'Xm";
+
+  switch (instr->Mask(DataProcessing2SourceMask)) {
+    #define FORMAT(A, B)  \
+    case A##_w:           \
+    case A##_x: mnemonic = B; break;
+    FORMAT(UDIV, "udiv");
+    FORMAT(SDIV, "sdiv");
+    FORMAT(LSLV, "lsl");
+    FORMAT(LSRV, "lsr");
+    FORMAT(ASRV, "asr");
+    FORMAT(RORV, "ror");
+    #undef FORMAT
+    case CRC32B: mnemonic = "crc32b"; break;
+    case CRC32H: mnemonic = "crc32h"; break;
+    case CRC32W: mnemonic = "crc32w"; break;
+    case CRC32X: mnemonic = "crc32x"; form = form_wwx; break;
+    case CRC32CB: mnemonic = "crc32cb"; break;
+    case CRC32CH: mnemonic = "crc32ch"; break;
+    case CRC32CW: mnemonic = "crc32cw"; break;
+    case CRC32CX: mnemonic = "crc32cx"; form = form_wwx; break;
+    default: form = "(DataProcessing2Source)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitDataProcessing3Source(const Instruction* instr) {
+  bool ra_is_zr = RaIsZROrSP(instr);
+  const char *mnemonic = "";
+  const char *form = "'Xd, 'Wn, 'Wm, 'Xa";
+  const char *form_rrr = "'Rd, 'Rn, 'Rm";
+  const char *form_rrrr = "'Rd, 'Rn, 'Rm, 'Ra";
+  const char *form_xww = "'Xd, 'Wn, 'Wm";
+  const char *form_xxx = "'Xd, 'Xn, 'Xm";
+
+  switch (instr->Mask(DataProcessing3SourceMask)) {
+    case MADD_w:
+    case MADD_x: {
+      mnemonic = "madd";
+      form = form_rrrr;
+      if (ra_is_zr) {
+        mnemonic = "mul";
+        form = form_rrr;
+      }
+      break;
+    }
+    case MSUB_w:
+    case MSUB_x: {
+      mnemonic = "msub";
+      form = form_rrrr;
+      if (ra_is_zr) {
+        mnemonic = "mneg";
+        form = form_rrr;
+      }
+      break;
+    }
+    case SMADDL_x: {
+      mnemonic = "smaddl";
+      if (ra_is_zr) {
+        mnemonic = "smull";
+        form = form_xww;
+      }
+      break;
+    }
+    case SMSUBL_x: {
+      mnemonic = "smsubl";
+      if (ra_is_zr) {
+        mnemonic = "smnegl";
+        form = form_xww;
+      }
+      break;
+    }
+    case UMADDL_x: {
+      mnemonic = "umaddl";
+      if (ra_is_zr) {
+        mnemonic = "umull";
+        form = form_xww;
+      }
+      break;
+    }
+    case UMSUBL_x: {
+      mnemonic = "umsubl";
+      if (ra_is_zr) {
+        mnemonic = "umnegl";
+        form = form_xww;
+      }
+      break;
+    }
+    case SMULH_x: {
+      mnemonic = "smulh";
+      form = form_xxx;
+      break;
+    }
+    case UMULH_x: {
+      mnemonic = "umulh";
+      form = form_xxx;
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitCompareBranch(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Rt, 'TImmCmpa";
+
+  switch (instr->Mask(CompareBranchMask)) {
+    case CBZ_w:
+    case CBZ_x: mnemonic = "cbz"; break;
+    case CBNZ_w:
+    case CBNZ_x: mnemonic = "cbnz"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitTestBranch(const Instruction* instr) {
+  const char *mnemonic = "";
+  // If the top bit of the immediate is clear, the tested register is
+  // disassembled as Wt, otherwise Xt. As the top bit of the immediate is
+  // encoded in bit 31 of the instruction, we can reuse the Rt form, which
+  // uses bit 31 (normally "sf") to choose the register size.
+  const char *form = "'Rt, 'IS, 'TImmTest";
+
+  switch (instr->Mask(TestBranchMask)) {
+    case TBZ: mnemonic = "tbz"; break;
+    case TBNZ: mnemonic = "tbnz"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitMoveWideImmediate(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Rd, 'IMoveImm";
+
+  // Print the shift separately for movk, to make it clear which half word will
+  // be overwritten. Movn and movz print the computed immediate, which includes
+  // shift calculation.
+  switch (instr->Mask(MoveWideImmediateMask)) {
+    case MOVN_w:
+    case MOVN_x:
+      if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0)) {
+        if ((instr->SixtyFourBits() == 0) && (instr->ImmMoveWide() == 0xffff)) {
+          mnemonic = "movn";
+        } else {
+          mnemonic = "mov";
+          form = "'Rd, 'IMoveNeg";
+        }
+      } else {
+        mnemonic = "movn";
+      }
+      break;
+    case MOVZ_w:
+    case MOVZ_x:
+      if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0))
+        mnemonic = "mov";
+      else
+        mnemonic = "movz";
+      break;
+    case MOVK_w:
+    case MOVK_x: mnemonic = "movk"; form = "'Rd, 'IMoveLSL"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+#define LOAD_STORE_LIST(V)    \
+  V(STRB_w, "strb", "'Wt")    \
+  V(STRH_w, "strh", "'Wt")    \
+  V(STR_w, "str", "'Wt")      \
+  V(STR_x, "str", "'Xt")      \
+  V(LDRB_w, "ldrb", "'Wt")    \
+  V(LDRH_w, "ldrh", "'Wt")    \
+  V(LDR_w, "ldr", "'Wt")      \
+  V(LDR_x, "ldr", "'Xt")      \
+  V(LDRSB_x, "ldrsb", "'Xt")  \
+  V(LDRSH_x, "ldrsh", "'Xt")  \
+  V(LDRSW_x, "ldrsw", "'Xt")  \
+  V(LDRSB_w, "ldrsb", "'Wt")  \
+  V(LDRSH_w, "ldrsh", "'Wt")  \
+  V(STR_b, "str", "'Bt")      \
+  V(STR_h, "str", "'Ht")      \
+  V(STR_s, "str", "'St")      \
+  V(STR_d, "str", "'Dt")      \
+  V(LDR_b, "ldr", "'Bt")      \
+  V(LDR_h, "ldr", "'Ht")      \
+  V(LDR_s, "ldr", "'St")      \
+  V(LDR_d, "ldr", "'Dt")      \
+  V(STR_q, "str", "'Qt")      \
+  V(LDR_q, "ldr", "'Qt")
+
+void Disassembler::VisitLoadStorePreIndex(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(LoadStorePreIndex)";
+
+  switch (instr->Mask(LoadStorePreIndexMask)) {
+    #define LS_PREINDEX(A, B, C) \
+    case A##_pre: mnemonic = B; form = C ", ['Xns'ILS]!"; break;
+    LOAD_STORE_LIST(LS_PREINDEX)
+    #undef LS_PREINDEX
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStorePostIndex(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(LoadStorePostIndex)";
+
+  switch (instr->Mask(LoadStorePostIndexMask)) {
+    #define LS_POSTINDEX(A, B, C) \
+    case A##_post: mnemonic = B; form = C ", ['Xns]'ILS"; break;
+    LOAD_STORE_LIST(LS_POSTINDEX)
+    #undef LS_POSTINDEX
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(LoadStoreUnsignedOffset)";
+
+  switch (instr->Mask(LoadStoreUnsignedOffsetMask)) {
+    #define LS_UNSIGNEDOFFSET(A, B, C) \
+    case A##_unsigned: mnemonic = B; form = C ", ['Xns'ILU]"; break;
+    LOAD_STORE_LIST(LS_UNSIGNEDOFFSET)
+    #undef LS_UNSIGNEDOFFSET
+    case PRFM_unsigned: mnemonic = "prfm"; form = "'PrefOp, ['Xns'ILU]";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStoreRegisterOffset(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(LoadStoreRegisterOffset)";
+
+  switch (instr->Mask(LoadStoreRegisterOffsetMask)) {
+    #define LS_REGISTEROFFSET(A, B, C) \
+    case A##_reg: mnemonic = B; form = C ", ['Xns, 'Offsetreg]"; break;
+    LOAD_STORE_LIST(LS_REGISTEROFFSET)
+    #undef LS_REGISTEROFFSET
+    case PRFM_reg: mnemonic = "prfm"; form = "'PrefOp, ['Xns, 'Offsetreg]";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Wt, ['Xns'ILS]";
+  const char *form_x = "'Xt, ['Xns'ILS]";
+  const char *form_b = "'Bt, ['Xns'ILS]";
+  const char *form_h = "'Ht, ['Xns'ILS]";
+  const char *form_s = "'St, ['Xns'ILS]";
+  const char *form_d = "'Dt, ['Xns'ILS]";
+  const char *form_q = "'Qt, ['Xns'ILS]";
+  const char *form_prefetch = "'PrefOp, ['Xns'ILS]";
+
+  switch (instr->Mask(LoadStoreUnscaledOffsetMask)) {
+    case STURB_w:  mnemonic = "sturb"; break;
+    case STURH_w:  mnemonic = "sturh"; break;
+    case STUR_w:   mnemonic = "stur"; break;
+    case STUR_x:   mnemonic = "stur"; form = form_x; break;
+    case STUR_b:   mnemonic = "stur"; form = form_b; break;
+    case STUR_h:   mnemonic = "stur"; form = form_h; break;
+    case STUR_s:   mnemonic = "stur"; form = form_s; break;
+    case STUR_d:   mnemonic = "stur"; form = form_d; break;
+    case STUR_q:   mnemonic = "stur"; form = form_q; break;
+    case LDURB_w:  mnemonic = "ldurb"; break;
+    case LDURH_w:  mnemonic = "ldurh"; break;
+    case LDUR_w:   mnemonic = "ldur"; break;
+    case LDUR_x:   mnemonic = "ldur"; form = form_x; break;
+    case LDUR_b:   mnemonic = "ldur"; form = form_b; break;
+    case LDUR_h:   mnemonic = "ldur"; form = form_h; break;
+    case LDUR_s:   mnemonic = "ldur"; form = form_s; break;
+    case LDUR_d:   mnemonic = "ldur"; form = form_d; break;
+    case LDUR_q:   mnemonic = "ldur"; form = form_q; break;
+    case LDURSB_x: form = form_x; VIXL_FALLTHROUGH();
+    case LDURSB_w: mnemonic = "ldursb"; break;
+    case LDURSH_x: form = form_x; VIXL_FALLTHROUGH();
+    case LDURSH_w: mnemonic = "ldursh"; break;
+    case LDURSW_x: mnemonic = "ldursw"; form = form_x; break;
+    case PRFUM:    mnemonic = "prfum"; form = form_prefetch; break;
+    default: form = "(LoadStoreUnscaledOffset)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadLiteral(const Instruction* instr) {
+  const char *mnemonic = "ldr";
+  const char *form = "(LoadLiteral)";
+
+  switch (instr->Mask(LoadLiteralMask)) {
+    case LDR_w_lit: form = "'Wt, 'ILLiteral 'LValue"; break;
+    case LDR_x_lit: form = "'Xt, 'ILLiteral 'LValue"; break;
+    case LDR_s_lit: form = "'St, 'ILLiteral 'LValue"; break;
+    case LDR_d_lit: form = "'Dt, 'ILLiteral 'LValue"; break;
+    case LDR_q_lit: form = "'Qt, 'ILLiteral 'LValue"; break;
+    case LDRSW_x_lit: {
+      mnemonic = "ldrsw";
+      form = "'Xt, 'ILLiteral 'LValue";
+      break;
+    }
+    case PRFM_lit: {
+      mnemonic = "prfm";
+      form = "'PrefOp, 'ILLiteral 'LValue";
+      break;
+    }
+    default: mnemonic = "unimplemented";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+#define LOAD_STORE_PAIR_LIST(V)         \
+  V(STP_w, "stp", "'Wt, 'Wt2", "2")     \
+  V(LDP_w, "ldp", "'Wt, 'Wt2", "2")     \
+  V(LDPSW_x, "ldpsw", "'Xt, 'Xt2", "2") \
+  V(STP_x, "stp", "'Xt, 'Xt2", "3")     \
+  V(LDP_x, "ldp", "'Xt, 'Xt2", "3")     \
+  V(STP_s, "stp", "'St, 'St2", "2")     \
+  V(LDP_s, "ldp", "'St, 'St2", "2")     \
+  V(STP_d, "stp", "'Dt, 'Dt2", "3")     \
+  V(LDP_d, "ldp", "'Dt, 'Dt2", "3")     \
+  V(LDP_q, "ldp", "'Qt, 'Qt2", "4")     \
+  V(STP_q, "stp", "'Qt, 'Qt2", "4")
+
+void Disassembler::VisitLoadStorePairPostIndex(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(LoadStorePairPostIndex)";
+
+  switch (instr->Mask(LoadStorePairPostIndexMask)) {
+    #define LSP_POSTINDEX(A, B, C, D) \
+    case A##_post: mnemonic = B; form = C ", ['Xns]'ILP" D; break;
+    LOAD_STORE_PAIR_LIST(LSP_POSTINDEX)
+    #undef LSP_POSTINDEX
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStorePairPreIndex(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(LoadStorePairPreIndex)";
+
+  switch (instr->Mask(LoadStorePairPreIndexMask)) {
+    #define LSP_PREINDEX(A, B, C, D) \
+    case A##_pre: mnemonic = B; form = C ", ['Xns'ILP" D "]!"; break;
+    LOAD_STORE_PAIR_LIST(LSP_PREINDEX)
+    #undef LSP_PREINDEX
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStorePairOffset(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(LoadStorePairOffset)";
+
+  switch (instr->Mask(LoadStorePairOffsetMask)) {
+    #define LSP_OFFSET(A, B, C, D) \
+    case A##_off: mnemonic = B; form = C ", ['Xns'ILP" D "]"; break;
+    LOAD_STORE_PAIR_LIST(LSP_OFFSET)
+    #undef LSP_OFFSET
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStorePairNonTemporal(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form;
+
+  switch (instr->Mask(LoadStorePairNonTemporalMask)) {
+    case STNP_w: mnemonic = "stnp"; form = "'Wt, 'Wt2, ['Xns'ILP2]"; break;
+    case LDNP_w: mnemonic = "ldnp"; form = "'Wt, 'Wt2, ['Xns'ILP2]"; break;
+    case STNP_x: mnemonic = "stnp"; form = "'Xt, 'Xt2, ['Xns'ILP3]"; break;
+    case LDNP_x: mnemonic = "ldnp"; form = "'Xt, 'Xt2, ['Xns'ILP3]"; break;
+    case STNP_s: mnemonic = "stnp"; form = "'St, 'St2, ['Xns'ILP2]"; break;
+    case LDNP_s: mnemonic = "ldnp"; form = "'St, 'St2, ['Xns'ILP2]"; break;
+    case STNP_d: mnemonic = "stnp"; form = "'Dt, 'Dt2, ['Xns'ILP3]"; break;
+    case LDNP_d: mnemonic = "ldnp"; form = "'Dt, 'Dt2, ['Xns'ILP3]"; break;
+    case STNP_q: mnemonic = "stnp"; form = "'Qt, 'Qt2, ['Xns'ILP4]"; break;
+    case LDNP_q: mnemonic = "ldnp"; form = "'Qt, 'Qt2, ['Xns'ILP4]"; break;
+    default: form = "(LoadStorePairNonTemporal)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+// clang-format off
+#define LOAD_STORE_EXCLUSIVE_LIST(V)                  \
+  V(STXRB_w,  "stxrb",  "'Ws, 'Wt")                   \
+  V(STXRH_w,  "stxrh",  "'Ws, 'Wt")                   \
+  V(STXR_w,   "stxr",   "'Ws, 'Wt")                   \
+  V(STXR_x,   "stxr",   "'Ws, 'Xt")                   \
+  V(LDXRB_w,  "ldxrb",  "'Wt")                        \
+  V(LDXRH_w,  "ldxrh",  "'Wt")                        \
+  V(LDXR_w,   "ldxr",   "'Wt")                        \
+  V(LDXR_x,   "ldxr",   "'Xt")                        \
+  V(STXP_w,   "stxp",   "'Ws, 'Wt, 'Wt2")             \
+  V(STXP_x,   "stxp",   "'Ws, 'Xt, 'Xt2")             \
+  V(LDXP_w,   "ldxp",   "'Wt, 'Wt2")                  \
+  V(LDXP_x,   "ldxp",   "'Xt, 'Xt2")                  \
+  V(STLXRB_w, "stlxrb", "'Ws, 'Wt")                   \
+  V(STLXRH_w, "stlxrh", "'Ws, 'Wt")                   \
+  V(STLXR_w,  "stlxr",  "'Ws, 'Wt")                   \
+  V(STLXR_x,  "stlxr",  "'Ws, 'Xt")                   \
+  V(LDAXRB_w, "ldaxrb", "'Wt")                        \
+  V(LDAXRH_w, "ldaxrh", "'Wt")                        \
+  V(LDAXR_w,  "ldaxr",  "'Wt")                        \
+  V(LDAXR_x,  "ldaxr",  "'Xt")                        \
+  V(STLXP_w,  "stlxp",  "'Ws, 'Wt, 'Wt2")             \
+  V(STLXP_x,  "stlxp",  "'Ws, 'Xt, 'Xt2")             \
+  V(LDAXP_w,  "ldaxp",  "'Wt, 'Wt2")                  \
+  V(LDAXP_x,  "ldaxp",  "'Xt, 'Xt2")                  \
+  V(STLRB_w,  "stlrb",  "'Wt")                        \
+  V(STLRH_w,  "stlrh",  "'Wt")                        \
+  V(STLR_w,   "stlr",   "'Wt")                        \
+  V(STLR_x,   "stlr",   "'Xt")                        \
+  V(LDARB_w,  "ldarb",  "'Wt")                        \
+  V(LDARH_w,  "ldarh",  "'Wt")                        \
+  V(LDAR_w,   "ldar",   "'Wt")                        \
+  V(LDAR_x,   "ldar",   "'Xt")                        \
+  V(CAS_w,    "cas",    "'Ws, 'Wt")                   \
+  V(CAS_x,    "cas",    "'Xs, 'Xt")                   \
+  V(CASA_w,   "casa",   "'Ws, 'Wt")                   \
+  V(CASA_x,   "casa",   "'Xs, 'Xt")                   \
+  V(CASL_w,   "casl",   "'Ws, 'Wt")                   \
+  V(CASL_x,   "casl",   "'Xs, 'Xt")                   \
+  V(CASAL_w,  "casal",  "'Ws, 'Wt")                   \
+  V(CASAL_x,  "casal",  "'Xs, 'Xt")                   \
+  V(CASB,     "casb",   "'Ws, 'Wt")                   \
+  V(CASAB,    "casab",  "'Ws, 'Wt")                   \
+  V(CASLB,    "caslb",  "'Ws, 'Wt")                   \
+  V(CASALB,   "casalb", "'Ws, 'Wt")                   \
+  V(CASH,     "cash",   "'Ws, 'Wt")                   \
+  V(CASAH,    "casah",  "'Ws, 'Wt")                   \
+  V(CASLH,    "caslh",  "'Ws, 'Wt")                   \
+  V(CASALH,   "casalh", "'Ws, 'Wt")                   \
+  V(CASP_w,   "casp",   "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
+  V(CASP_x,   "casp",   "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
+  V(CASPA_w,  "caspa",  "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
+  V(CASPA_x,  "caspa",  "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
+  V(CASPL_w,  "caspl",  "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
+  V(CASPL_x,  "caspl",  "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
+  V(CASPAL_w, "caspal", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
+  V(CASPAL_x, "caspal", "'Xs, 'X(s+1), 'Xt, 'X(t+1)")
+// clang-format on
+
+void Disassembler::VisitLoadStoreExclusive(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form;
+
+  switch (instr->Mask(LoadStoreExclusiveMask)) {
+#define LSX(A, B, C)     \
+  case A:                \
+    mnemonic = B;        \
+    form = C ", ['Xns]"; \
+    break;
+    LOAD_STORE_EXCLUSIVE_LIST(LSX)
+#undef LSX
+    default:
+      form = "(LoadStoreExclusive)";
+  }
+
+  switch (instr->Mask(LoadStoreExclusiveMask)) {
+    case CASP_w:
+    case CASP_x:
+    case CASPA_w:
+    case CASPA_x:
+    case CASPL_w:
+    case CASPL_x:
+    case CASPAL_w:
+    case CASPAL_x:
+      if ((instr->Rs() % 2 == 1) || (instr->Rt() % 2 == 1)) {
+        mnemonic = "unallocated";
+        form = "(LoadStoreExclusive)";
+      }
+      break;
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+#define ATOMIC_MEMORY_SIMPLE_LIST(V) \
+  V(LDADD, "add")                    \
+  V(LDCLR, "clr")                    \
+  V(LDEOR, "eor")                    \
+  V(LDSET, "set")                    \
+  V(LDSMAX, "smax")                  \
+  V(LDSMIN, "smin")                  \
+  V(LDUMAX, "umax")                  \
+  V(LDUMIN, "umin")
+
+void Disassembler::VisitAtomicMemory(const Instruction* instr) {
+  const int kMaxAtomicOpMnemonicLength = 16;
+  const char* mnemonic;
+  const char* form = "'Ws, 'Wt, ['Xns]";
+
+  switch (instr->Mask(AtomicMemoryMask)) {
+#define AMS(A, MN)             \
+  case A##B:                   \
+    mnemonic = MN "b";         \
+    break;                     \
+  case A##AB:                  \
+    mnemonic = MN "ab";        \
+    break;                     \
+  case A##LB:                  \
+    mnemonic = MN "lb";        \
+    break;                     \
+  case A##ALB:                 \
+    mnemonic = MN "alb";       \
+    break;                     \
+  case A##H:                   \
+    mnemonic = MN "h";         \
+    break;                     \
+  case A##AH:                  \
+    mnemonic = MN "ah";        \
+    break;                     \
+  case A##LH:                  \
+    mnemonic = MN "lh";        \
+    break;                     \
+  case A##ALH:                 \
+    mnemonic = MN "alh";       \
+    break;                     \
+  case A##_w:                  \
+    mnemonic = MN;             \
+    break;                     \
+  case A##A_w:                 \
+    mnemonic = MN "a";         \
+    break;                     \
+  case A##L_w:                 \
+    mnemonic = MN "l";         \
+    break;                     \
+  case A##AL_w:                \
+    mnemonic = MN "al";        \
+    break;                     \
+  case A##_x:                  \
+    mnemonic = MN;             \
+    form = "'Xs, 'Xt, ['Xns]"; \
+    break;                     \
+  case A##A_x:                 \
+    mnemonic = MN "a";         \
+    form = "'Xs, 'Xt, ['Xns]"; \
+    break;                     \
+  case A##L_x:                 \
+    mnemonic = MN "l";         \
+    form = "'Xs, 'Xt, ['Xns]"; \
+    break;                     \
+  case A##AL_x:                \
+    mnemonic = MN "al";        \
+    form = "'Xs, 'Xt, ['Xns]"; \
+    break;
+    ATOMIC_MEMORY_SIMPLE_LIST(AMS)
+
+    // SWP has the same semantics as ldadd etc but without the store aliases.
+    AMS(SWP, "swp")
+#undef AMS
+
+    case LDAPRB:
+      mnemonic = "ldaprb";
+      form = "'Wt, ['Xns]";
+      break;
+    case LDAPRH:
+      mnemonic = "ldaprh";
+      form = "'Wt, ['Xns]";
+      break;
+    case LDAPR_w:
+      mnemonic = "ldapr";
+      form = "'Wt, ['Xns]";
+      break;
+    case LDAPR_x:
+      mnemonic = "ldapr";
+      form = "'Xt, ['Xns]";
+      break;
+    default:
+      mnemonic = "unimplemented";
+      form = "(AtomicMemory)";
+  }
+
+  const char* prefix = "";
+  switch (instr->Mask(AtomicMemoryMask)) {
+#define AMS(A, MN)                   \
+  case A##AB:                        \
+  case A##ALB:                       \
+  case A##AH:                        \
+  case A##ALH:                       \
+  case A##A_w:                       \
+  case A##AL_w:                      \
+  case A##A_x:                       \
+  case A##AL_x:                      \
+    prefix = "ld";                   \
+    break;                           \
+  case A##B:                         \
+  case A##LB:                        \
+  case A##H:                         \
+  case A##LH:                        \
+  case A##_w:                        \
+  case A##L_w: {                     \
+    prefix = "ld";                   \
+    unsigned rt = instr->Rt();       \
+    if (Register(rt, 32).IsZero()) { \
+      prefix = "st";                 \
+      form = "'Ws, ['Xns]";          \
+    }                                \
+    break;                           \
+  }                                  \
+  case A##_x:                        \
+  case A##L_x: {                     \
+    prefix = "ld";                   \
+    unsigned rt = instr->Rt();       \
+    if (Register(rt, 64).IsZero()) { \
+      prefix = "st";                 \
+      form = "'Xs, ['Xns]";          \
+    }                                \
+    break;                           \
+  }
+    ATOMIC_MEMORY_SIMPLE_LIST(AMS)
+#undef AMS
+  }
+
+  char buffer[kMaxAtomicOpMnemonicLength];
+  if (strlen(prefix) > 0) {
+    snprintf(buffer, kMaxAtomicOpMnemonicLength, "%s%s", prefix, mnemonic);
+    mnemonic = buffer;
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitFPCompare(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Fn, 'Fm";
+  const char *form_zero = "'Fn, #0.0";
+
+  switch (instr->Mask(FPCompareMask)) {
+    case FCMP_s_zero:
+    case FCMP_d_zero: form = form_zero; VIXL_FALLTHROUGH();
+    case FCMP_s:
+    case FCMP_d: mnemonic = "fcmp"; break;
+    case FCMPE_s_zero:
+    case FCMPE_d_zero: form = form_zero; VIXL_FALLTHROUGH();
+    case FCMPE_s:
+    case FCMPE_d: mnemonic = "fcmpe"; break;
+    default: form = "(FPCompare)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPConditionalCompare(const Instruction* instr) {
+  const char *mnemonic = "unmplemented";
+  const char *form = "'Fn, 'Fm, 'INzcv, 'Cond";
+
+  switch (instr->Mask(FPConditionalCompareMask)) {
+    case FCCMP_s:
+    case FCCMP_d: mnemonic = "fccmp"; break;
+    case FCCMPE_s:
+    case FCCMPE_d: mnemonic = "fccmpe"; break;
+    default: form = "(FPConditionalCompare)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPConditionalSelect(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Fd, 'Fn, 'Fm, 'Cond";
+
+  switch (instr->Mask(FPConditionalSelectMask)) {
+    case FCSEL_s:
+    case FCSEL_d: mnemonic = "fcsel"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPDataProcessing1Source(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Fd, 'Fn";
+
+  switch (instr->Mask(FPDataProcessing1SourceMask)) {
+    #define FORMAT(A, B)  \
+    case A##_s:           \
+    case A##_d: mnemonic = B; break;
+    FORMAT(FMOV, "fmov");
+    FORMAT(FABS, "fabs");
+    FORMAT(FNEG, "fneg");
+    FORMAT(FSQRT, "fsqrt");
+    FORMAT(FRINTN, "frintn");
+    FORMAT(FRINTP, "frintp");
+    FORMAT(FRINTM, "frintm");
+    FORMAT(FRINTZ, "frintz");
+    FORMAT(FRINTA, "frinta");
+    FORMAT(FRINTX, "frintx");
+    FORMAT(FRINTI, "frinti");
+    #undef FORMAT
+    case FCVT_ds: mnemonic = "fcvt"; form = "'Dd, 'Sn"; break;
+    case FCVT_sd: mnemonic = "fcvt"; form = "'Sd, 'Dn"; break;
+    case FCVT_hs: mnemonic = "fcvt"; form = "'Hd, 'Sn"; break;
+    case FCVT_sh: mnemonic = "fcvt"; form = "'Sd, 'Hn"; break;
+    case FCVT_dh: mnemonic = "fcvt"; form = "'Dd, 'Hn"; break;
+    case FCVT_hd: mnemonic = "fcvt"; form = "'Hd, 'Dn"; break;
+    default: form = "(FPDataProcessing1Source)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPDataProcessing2Source(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Fd, 'Fn, 'Fm";
+
+  switch (instr->Mask(FPDataProcessing2SourceMask)) {
+    #define FORMAT(A, B)  \
+    case A##_s:           \
+    case A##_d: mnemonic = B; break;
+    FORMAT(FMUL, "fmul");
+    FORMAT(FDIV, "fdiv");
+    FORMAT(FADD, "fadd");
+    FORMAT(FSUB, "fsub");
+    FORMAT(FMAX, "fmax");
+    FORMAT(FMIN, "fmin");
+    FORMAT(FMAXNM, "fmaxnm");
+    FORMAT(FMINNM, "fminnm");
+    FORMAT(FNMUL, "fnmul");
+    #undef FORMAT
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPDataProcessing3Source(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Fd, 'Fn, 'Fm, 'Fa";
+
+  switch (instr->Mask(FPDataProcessing3SourceMask)) {
+    #define FORMAT(A, B)  \
+    case A##_s:           \
+    case A##_d: mnemonic = B; break;
+    FORMAT(FMADD, "fmadd");
+    FORMAT(FMSUB, "fmsub");
+    FORMAT(FNMADD, "fnmadd");
+    FORMAT(FNMSUB, "fnmsub");
+    #undef FORMAT
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPImmediate(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "(FPImmediate)";
+
+  switch (instr->Mask(FPImmediateMask)) {
+    case FMOV_s_imm: mnemonic = "fmov"; form = "'Sd, 'IFPSingle"; break;
+    case FMOV_d_imm: mnemonic = "fmov"; form = "'Dd, 'IFPDouble"; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPIntegerConvert(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(FPIntegerConvert)";
+  const char *form_rf = "'Rd, 'Fn";
+  const char *form_fr = "'Fd, 'Rn";
+
+  switch (instr->Mask(FPIntegerConvertMask)) {
+    case FMOV_ws:
+    case FMOV_xd: mnemonic = "fmov"; form = form_rf; break;
+    case FMOV_sw:
+    case FMOV_dx: mnemonic = "fmov"; form = form_fr; break;
+    case FMOV_d1_x: mnemonic = "fmov"; form = "'Vd.D[1], 'Rn"; break;
+    case FMOV_x_d1: mnemonic = "fmov"; form = "'Rd, 'Vn.D[1]"; break;
+    case FCVTAS_ws:
+    case FCVTAS_xs:
+    case FCVTAS_wd:
+    case FCVTAS_xd: mnemonic = "fcvtas"; form = form_rf; break;
+    case FCVTAU_ws:
+    case FCVTAU_xs:
+    case FCVTAU_wd:
+    case FCVTAU_xd: mnemonic = "fcvtau"; form = form_rf; break;
+    case FCVTMS_ws:
+    case FCVTMS_xs:
+    case FCVTMS_wd:
+    case FCVTMS_xd: mnemonic = "fcvtms"; form = form_rf; break;
+    case FCVTMU_ws:
+    case FCVTMU_xs:
+    case FCVTMU_wd:
+    case FCVTMU_xd: mnemonic = "fcvtmu"; form = form_rf; break;
+    case FCVTNS_ws:
+    case FCVTNS_xs:
+    case FCVTNS_wd:
+    case FCVTNS_xd: mnemonic = "fcvtns"; form = form_rf; break;
+    case FCVTNU_ws:
+    case FCVTNU_xs:
+    case FCVTNU_wd:
+    case FCVTNU_xd: mnemonic = "fcvtnu"; form = form_rf; break;
+    case FCVTZU_xd:
+    case FCVTZU_ws:
+    case FCVTZU_wd:
+    case FCVTZU_xs: mnemonic = "fcvtzu"; form = form_rf; break;
+    case FCVTZS_xd:
+    case FCVTZS_wd:
+    case FCVTZS_xs:
+    case FCVTZS_ws: mnemonic = "fcvtzs"; form = form_rf; break;
+    case FCVTPU_xd:
+    case FCVTPU_ws:
+    case FCVTPU_wd:
+    case FCVTPU_xs: mnemonic = "fcvtpu"; form = form_rf; break;
+    case FCVTPS_xd:
+    case FCVTPS_wd:
+    case FCVTPS_xs:
+    case FCVTPS_ws: mnemonic = "fcvtps"; form = form_rf; break;
+    case SCVTF_sw:
+    case SCVTF_sx:
+    case SCVTF_dw:
+    case SCVTF_dx: mnemonic = "scvtf"; form = form_fr; break;
+    case UCVTF_sw:
+    case UCVTF_sx:
+    case UCVTF_dw:
+    case UCVTF_dx: mnemonic = "ucvtf"; form = form_fr; break;
+    case FJCVTZS: mnemonic = "fjcvtzs"; form = form_rf; break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPFixedPointConvert(const Instruction* instr) {
+  const char *mnemonic = "";
+  const char *form = "'Rd, 'Fn, 'IFPFBits";
+  const char *form_fr = "'Fd, 'Rn, 'IFPFBits";
+
+  switch (instr->Mask(FPFixedPointConvertMask)) {
+    case FCVTZS_ws_fixed:
+    case FCVTZS_xs_fixed:
+    case FCVTZS_wd_fixed:
+    case FCVTZS_xd_fixed: mnemonic = "fcvtzs"; break;
+    case FCVTZU_ws_fixed:
+    case FCVTZU_xs_fixed:
+    case FCVTZU_wd_fixed:
+    case FCVTZU_xd_fixed: mnemonic = "fcvtzu"; break;
+    case SCVTF_sw_fixed:
+    case SCVTF_sx_fixed:
+    case SCVTF_dw_fixed:
+    case SCVTF_dx_fixed: mnemonic = "scvtf"; form = form_fr; break;
+    case UCVTF_sw_fixed:
+    case UCVTF_sx_fixed:
+    case UCVTF_dw_fixed:
+    case UCVTF_dx_fixed: mnemonic = "ucvtf"; form = form_fr; break;
+    default: VIXL_UNREACHABLE();
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitSystem(const Instruction* instr) {
+  // Some system instructions hijack their Op and Cp fields to represent a
+  // range of immediates instead of indicating a different instruction. This
+  // makes the decoding tricky.
+  const char *mnemonic = "unimplemented";
+  const char *form = "(System)";
+
+  if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
+    switch (instr->Mask(SystemExclusiveMonitorMask)) {
+      case CLREX: {
+        mnemonic = "clrex";
+        form = (instr->CRm() == 0xf) ? NULL : "'IX";
+        break;
+      }
+    }
+  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
+    switch (instr->Mask(SystemSysRegMask)) {
+      case MRS: {
+        mnemonic = "mrs";
+        switch (instr->ImmSystemRegister()) {
+          case NZCV: form = "'Xt, nzcv"; break;
+          case FPCR: form = "'Xt, fpcr"; break;
+          default: form = "'Xt, (unknown)"; break;
+        }
+        break;
+      }
+      case MSR: {
+        mnemonic = "msr";
+        switch (instr->ImmSystemRegister()) {
+          case NZCV: form = "nzcv, 'Xt"; break;
+          case FPCR: form = "fpcr, 'Xt"; break;
+          default: form = "(unknown), 'Xt"; break;
+        }
+        break;
+      }
+    }
+  } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
+    switch (instr->ImmHint()) {
+      case NOP: {
+        mnemonic = "nop";
+        form = NULL;
+        break;
+      }
+      case CSDB: {
+        mnemonic = "csdb";
+        form = NULL;
+        break;
+      }
+    }
+  } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
+    switch (instr->Mask(MemBarrierMask)) {
+      case DMB: {
+        mnemonic = "dmb";
+        form = "'M";
+        break;
+      }
+      case DSB: {
+        mnemonic = "dsb";
+        form = "'M";
+        break;
+      }
+      case ISB: {
+        mnemonic = "isb";
+        form = NULL;
+        break;
+      }
+    }
+  } else if (instr->Mask(SystemSysFMask) == SystemSysFixed) {
+    switch (instr->SysOp()) {
+      case IVAU:
+        mnemonic = "ic";
+        form = "ivau, 'Xt";
+        break;
+      case CVAC:
+        mnemonic = "dc";
+        form = "cvac, 'Xt";
+        break;
+      case CVAU:
+        mnemonic = "dc";
+        form = "cvau, 'Xt";
+        break;
+      case CIVAC:
+        mnemonic = "dc";
+        form = "civac, 'Xt";
+        break;
+      case ZVA:
+        mnemonic = "dc";
+        form = "zva, 'Xt";
+        break;
+      default:
+        mnemonic = "sys";
+        if (instr->Rt() == 31) {
+          form = "'G1, 'Kn, 'Km, 'G2";
+        } else {
+          form = "'G1, 'Kn, 'Km, 'G2, 'Xt";
+        }
+        break;
+      }
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitException(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'IDebug";
+
+  switch (instr->Mask(ExceptionMask)) {
+    case HLT: mnemonic = "hlt"; break;
+    case BRK: mnemonic = "brk"; break;
+    case SVC: mnemonic = "svc"; break;
+    case HVC: mnemonic = "hvc"; break;
+    case SMC: mnemonic = "smc"; break;
+    case DCPS1: mnemonic = "dcps1"; form = "{'IDebug}"; break;
+    case DCPS2: mnemonic = "dcps2"; form = "{'IDebug}"; break;
+    case DCPS3: mnemonic = "dcps3"; form = "{'IDebug}"; break;
+    default: form = "(Exception)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitCrypto2RegSHA(const Instruction* instr) {
+  VisitUnimplemented(instr);
+}
+
+
+void Disassembler::VisitCrypto3RegSHA(const Instruction* instr) {
+  VisitUnimplemented(instr);
+}
+
+
+void Disassembler::VisitCryptoAES(const Instruction* instr) {
+  VisitUnimplemented(instr);
+}
+
+
+void Disassembler::VisitNEON2RegMisc(const Instruction* instr) {
+  const char *mnemonic       = "unimplemented";
+  const char *form           = "'Vd.%s, 'Vn.%s";
+  const char *form_cmp_zero  = "'Vd.%s, 'Vn.%s, #0";
+  const char *form_fcmp_zero = "'Vd.%s, 'Vn.%s, #0.0";
+  NEONFormatDecoder nfd(instr);
+
+  static const NEONFormatMap map_lp_ta = {
+    {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}
+  };
+
+  static const NEONFormatMap map_cvt_ta = {
+    {22}, {NF_4S, NF_2D}
+  };
+
+  static const NEONFormatMap map_cvt_tb = {
+    {22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S}
+  };
+
+  if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
+    // These instructions all use a two bit size field, except NOT and RBIT,
+    // which use the field to encode the operation.
+    switch (instr->Mask(NEON2RegMiscMask)) {
+      case NEON_REV64:     mnemonic = "rev64"; break;
+      case NEON_REV32:     mnemonic = "rev32"; break;
+      case NEON_REV16:     mnemonic = "rev16"; break;
+      case NEON_SADDLP:
+        mnemonic = "saddlp";
+        nfd.SetFormatMap(0, &map_lp_ta);
+        break;
+      case NEON_UADDLP:
+        mnemonic = "uaddlp";
+        nfd.SetFormatMap(0, &map_lp_ta);
+        break;
+      case NEON_SUQADD:    mnemonic = "suqadd"; break;
+      case NEON_USQADD:    mnemonic = "usqadd"; break;
+      case NEON_CLS:       mnemonic = "cls"; break;
+      case NEON_CLZ:       mnemonic = "clz"; break;
+      case NEON_CNT:       mnemonic = "cnt"; break;
+      case NEON_SADALP:
+        mnemonic = "sadalp";
+        nfd.SetFormatMap(0, &map_lp_ta);
+        break;
+      case NEON_UADALP:
+        mnemonic = "uadalp";
+        nfd.SetFormatMap(0, &map_lp_ta);
+        break;
+      case NEON_SQABS:     mnemonic = "sqabs"; break;
+      case NEON_SQNEG:     mnemonic = "sqneg"; break;
+      case NEON_CMGT_zero: mnemonic = "cmgt"; form = form_cmp_zero; break;
+      case NEON_CMGE_zero: mnemonic = "cmge"; form = form_cmp_zero; break;
+      case NEON_CMEQ_zero: mnemonic = "cmeq"; form = form_cmp_zero; break;
+      case NEON_CMLE_zero: mnemonic = "cmle"; form = form_cmp_zero; break;
+      case NEON_CMLT_zero: mnemonic = "cmlt"; form = form_cmp_zero; break;
+      case NEON_ABS:       mnemonic = "abs"; break;
+      case NEON_NEG:       mnemonic = "neg"; break;
+      case NEON_RBIT_NOT:
+        switch (instr->FPType()) {
+          case 0: mnemonic = "mvn"; break;
+          case 1: mnemonic = "rbit"; break;
+          default: form = "(NEON2RegMisc)";
+        }
+        nfd.SetFormatMaps(nfd.LogicalFormatMap());
+        break;
+    }
+  } else {
+    // These instructions all use a one bit size field, except XTN, SQXTUN,
+    // SHLL, SQXTN and UQXTN, which use a two bit size field.
+    nfd.SetFormatMaps(nfd.FPFormatMap());
+    switch (instr->Mask(NEON2RegMiscFPMask)) {
+      case NEON_FABS:   mnemonic = "fabs"; break;
+      case NEON_FNEG:   mnemonic = "fneg"; break;
+      case NEON_FCVTN:
+        mnemonic = instr->Mask(NEON_Q) ? "fcvtn2" : "fcvtn";
+        nfd.SetFormatMap(0, &map_cvt_tb);
+        nfd.SetFormatMap(1, &map_cvt_ta);
+        break;
+      case NEON_FCVTXN:
+        mnemonic = instr->Mask(NEON_Q) ? "fcvtxn2" : "fcvtxn";
+        nfd.SetFormatMap(0, &map_cvt_tb);
+        nfd.SetFormatMap(1, &map_cvt_ta);
+        break;
+      case NEON_FCVTL:
+        mnemonic = instr->Mask(NEON_Q) ? "fcvtl2" : "fcvtl";
+        nfd.SetFormatMap(0, &map_cvt_ta);
+        nfd.SetFormatMap(1, &map_cvt_tb);
+        break;
+      case NEON_FRINTN: mnemonic = "frintn"; break;
+      case NEON_FRINTA: mnemonic = "frinta"; break;
+      case NEON_FRINTP: mnemonic = "frintp"; break;
+      case NEON_FRINTM: mnemonic = "frintm"; break;
+      case NEON_FRINTX: mnemonic = "frintx"; break;
+      case NEON_FRINTZ: mnemonic = "frintz"; break;
+      case NEON_FRINTI: mnemonic = "frinti"; break;
+      case NEON_FCVTNS: mnemonic = "fcvtns"; break;
+      case NEON_FCVTNU: mnemonic = "fcvtnu"; break;
+      case NEON_FCVTPS: mnemonic = "fcvtps"; break;
+      case NEON_FCVTPU: mnemonic = "fcvtpu"; break;
+      case NEON_FCVTMS: mnemonic = "fcvtms"; break;
+      case NEON_FCVTMU: mnemonic = "fcvtmu"; break;
+      case NEON_FCVTZS: mnemonic = "fcvtzs"; break;
+      case NEON_FCVTZU: mnemonic = "fcvtzu"; break;
+      case NEON_FCVTAS: mnemonic = "fcvtas"; break;
+      case NEON_FCVTAU: mnemonic = "fcvtau"; break;
+      case NEON_FSQRT:  mnemonic = "fsqrt"; break;
+      case NEON_SCVTF:  mnemonic = "scvtf"; break;
+      case NEON_UCVTF:  mnemonic = "ucvtf"; break;
+      case NEON_URSQRTE: mnemonic = "ursqrte"; break;
+      case NEON_URECPE:  mnemonic = "urecpe";  break;
+      case NEON_FRSQRTE: mnemonic = "frsqrte"; break;
+      case NEON_FRECPE:  mnemonic = "frecpe";  break;
+      case NEON_FCMGT_zero: mnemonic = "fcmgt"; form = form_fcmp_zero; break;
+      case NEON_FCMGE_zero: mnemonic = "fcmge"; form = form_fcmp_zero; break;
+      case NEON_FCMEQ_zero: mnemonic = "fcmeq"; form = form_fcmp_zero; break;
+      case NEON_FCMLE_zero: mnemonic = "fcmle"; form = form_fcmp_zero; break;
+      case NEON_FCMLT_zero: mnemonic = "fcmlt"; form = form_fcmp_zero; break;
+      default:
+        if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
+            (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
+          nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+          nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+
+          switch (instr->Mask(NEON2RegMiscMask)) {
+            case NEON_XTN:    mnemonic = "xtn"; break;
+            case NEON_SQXTN:  mnemonic = "sqxtn"; break;
+            case NEON_UQXTN:  mnemonic = "uqxtn"; break;
+            case NEON_SQXTUN: mnemonic = "sqxtun"; break;
+            case NEON_SHLL:
+              mnemonic = "shll";
+              nfd.SetFormatMap(0, nfd.LongIntegerFormatMap());
+              nfd.SetFormatMap(1, nfd.IntegerFormatMap());
+              switch (instr->NEONSize()) {
+                case 0: form = "'Vd.%s, 'Vn.%s, #8"; break;
+                case 1: form = "'Vd.%s, 'Vn.%s, #16"; break;
+                case 2: form = "'Vd.%s, 'Vn.%s, #32"; break;
+                default: form = "(NEON2RegMisc)";
+              }
+          }
+          Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
+          return;
+        } else {
+          form = "(NEON2RegMisc)";
+        }
+    }
+  }
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEON3Same(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+  NEONFormatDecoder nfd(instr);
+
+  if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
+    switch (instr->Mask(NEON3SameLogicalMask)) {
+      case NEON_AND: mnemonic = "and"; break;
+      case NEON_ORR:
+        mnemonic = "orr";
+        if (instr->Rm() == instr->Rn()) {
+          mnemonic = "mov";
+          form = "'Vd.%s, 'Vn.%s";
+        }
+        break;
+      case NEON_ORN: mnemonic = "orn"; break;
+      case NEON_EOR: mnemonic = "eor"; break;
+      case NEON_BIC: mnemonic = "bic"; break;
+      case NEON_BIF: mnemonic = "bif"; break;
+      case NEON_BIT: mnemonic = "bit"; break;
+      case NEON_BSL: mnemonic = "bsl"; break;
+      default: form = "(NEON3Same)";
+    }
+    nfd.SetFormatMaps(nfd.LogicalFormatMap());
+  } else {
+    static const char *mnemonics[] = {
+        "shadd", "uhadd", "shadd", "uhadd",
+        "sqadd", "uqadd", "sqadd", "uqadd",
+        "srhadd", "urhadd", "srhadd", "urhadd",
+        NULL, NULL, NULL, NULL,  // Handled by logical cases above.
+        "shsub", "uhsub", "shsub", "uhsub",
+        "sqsub", "uqsub", "sqsub", "uqsub",
+        "cmgt", "cmhi", "cmgt", "cmhi",
+        "cmge", "cmhs", "cmge", "cmhs",
+        "sshl", "ushl", "sshl", "ushl",
+        "sqshl", "uqshl", "sqshl", "uqshl",
+        "srshl", "urshl", "srshl", "urshl",
+        "sqrshl", "uqrshl", "sqrshl", "uqrshl",
+        "smax", "umax", "smax", "umax",
+        "smin", "umin", "smin", "umin",
+        "sabd", "uabd", "sabd", "uabd",
+        "saba", "uaba", "saba", "uaba",
+        "add", "sub", "add", "sub",
+        "cmtst", "cmeq", "cmtst", "cmeq",
+        "mla", "mls", "mla", "mls",
+        "mul", "pmul", "mul", "pmul",
+        "smaxp", "umaxp", "smaxp", "umaxp",
+        "sminp", "uminp", "sminp", "uminp",
+        "sqdmulh", "sqrdmulh", "sqdmulh", "sqrdmulh",
+        "addp", "unallocated", "addp", "unallocated",
+        "fmaxnm", "fmaxnmp", "fminnm", "fminnmp",
+        "fmla", "unallocated", "fmls", "unallocated",
+        "fadd", "faddp", "fsub", "fabd",
+        "fmulx", "fmul", "unallocated", "unallocated",
+        "fcmeq", "fcmge", "unallocated", "fcmgt",
+        "unallocated", "facge", "unallocated", "facgt",
+        "fmax", "fmaxp", "fmin", "fminp",
+        "frecps", "fdiv", "frsqrts", "unallocated"};
+
+    // Operation is determined by the opcode bits (15-11), the top bit of
+    // size (23) and the U bit (29).
+    unsigned index = (instr->Bits(15, 11) << 2) | (instr->Bit(23) << 1) |
+                     instr->Bit(29);
+    VIXL_ASSERT(index < (sizeof(mnemonics) / sizeof(mnemonics[0])));
+    mnemonic = mnemonics[index];
+    // Assert that index is not one of the previously handled logical
+    // instructions.
+    VIXL_ASSERT(mnemonic != NULL);
+
+    if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
+      nfd.SetFormatMaps(nfd.FPFormatMap());
+    }
+  }
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEON3Different(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+
+  NEONFormatDecoder nfd(instr);
+  nfd.SetFormatMap(0, nfd.LongIntegerFormatMap());
+
+  // Ignore the Q bit. Appending a "2" suffix is handled later.
+  switch (instr->Mask(NEON3DifferentMask) & ~NEON_Q) {
+    case NEON_PMULL:    mnemonic = "pmull";   break;
+    case NEON_SABAL:    mnemonic = "sabal";   break;
+    case NEON_SABDL:    mnemonic = "sabdl";   break;
+    case NEON_SADDL:    mnemonic = "saddl";   break;
+    case NEON_SMLAL:    mnemonic = "smlal";   break;
+    case NEON_SMLSL:    mnemonic = "smlsl";   break;
+    case NEON_SMULL:    mnemonic = "smull";   break;
+    case NEON_SSUBL:    mnemonic = "ssubl";   break;
+    case NEON_SQDMLAL:  mnemonic = "sqdmlal"; break;
+    case NEON_SQDMLSL:  mnemonic = "sqdmlsl"; break;
+    case NEON_SQDMULL:  mnemonic = "sqdmull"; break;
+    case NEON_UABAL:    mnemonic = "uabal";   break;
+    case NEON_UABDL:    mnemonic = "uabdl";   break;
+    case NEON_UADDL:    mnemonic = "uaddl";   break;
+    case NEON_UMLAL:    mnemonic = "umlal";   break;
+    case NEON_UMLSL:    mnemonic = "umlsl";   break;
+    case NEON_UMULL:    mnemonic = "umull";   break;
+    case NEON_USUBL:    mnemonic = "usubl";   break;
+    case NEON_SADDW:
+      mnemonic = "saddw";
+      nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+      break;
+    case NEON_SSUBW:
+      mnemonic = "ssubw";
+      nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+      break;
+    case NEON_UADDW:
+      mnemonic = "uaddw";
+      nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+      break;
+    case NEON_USUBW:
+      mnemonic = "usubw";
+      nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+      break;
+    case NEON_ADDHN:
+      mnemonic = "addhn";
+      nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
+      nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+      break;
+    case NEON_RADDHN:
+      mnemonic = "raddhn";
+      nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
+      nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+      break;
+    case NEON_RSUBHN:
+      mnemonic = "rsubhn";
+      nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
+      nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+      break;
+    case NEON_SUBHN:
+      mnemonic = "subhn";
+      nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
+      nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+      break;
+    default: form = "(NEON3Different)";
+  }
+  Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONAcrossLanes(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "%sd, 'Vn.%s";
+
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap(),
+                               NEONFormatDecoder::IntegerFormatMap());
+
+  if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+    nfd.SetFormatMap(0, nfd.FPScalarFormatMap());
+    nfd.SetFormatMap(1, nfd.FPFormatMap());
+    switch (instr->Mask(NEONAcrossLanesFPMask)) {
+      case NEON_FMAXV: mnemonic = "fmaxv"; break;
+      case NEON_FMINV: mnemonic = "fminv"; break;
+      case NEON_FMAXNMV: mnemonic = "fmaxnmv"; break;
+      case NEON_FMINNMV: mnemonic = "fminnmv"; break;
+      default: form = "(NEONAcrossLanes)"; break;
+    }
+  } else if (instr->Mask(NEONAcrossLanesFMask) == NEONAcrossLanesFixed) {
+    switch (instr->Mask(NEONAcrossLanesMask)) {
+      case NEON_ADDV:  mnemonic = "addv"; break;
+      case NEON_SMAXV: mnemonic = "smaxv"; break;
+      case NEON_SMINV: mnemonic = "sminv"; break;
+      case NEON_UMAXV: mnemonic = "umaxv"; break;
+      case NEON_UMINV: mnemonic = "uminv"; break;
+      case NEON_SADDLV:
+        mnemonic = "saddlv";
+        nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
+        break;
+      case NEON_UADDLV:
+        mnemonic = "uaddlv";
+        nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
+        break;
+      default: form = "(NEONAcrossLanes)"; break;
+    }
+  }
+  Format(instr, mnemonic, nfd.Substitute(form,
+      NEONFormatDecoder::kPlaceholder, NEONFormatDecoder::kFormat));
+}
+
+
+void Disassembler::VisitNEONByIndexedElement(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  bool l_instr = false;
+  bool fp_instr = false;
+
+  const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
+
+  static const NEONFormatMap map_ta = {
+    {23, 22}, {NF_UNDEF, NF_4S, NF_2D}
+  };
+  NEONFormatDecoder nfd(instr, &map_ta,
+                        NEONFormatDecoder::IntegerFormatMap(),
+                        NEONFormatDecoder::ScalarFormatMap());
+
+  switch (instr->Mask(NEONByIndexedElementMask)) {
+    case NEON_SMULL_byelement:    mnemonic = "smull"; l_instr = true; break;
+    case NEON_UMULL_byelement:    mnemonic = "umull"; l_instr = true; break;
+    case NEON_SMLAL_byelement:    mnemonic = "smlal"; l_instr = true; break;
+    case NEON_UMLAL_byelement:    mnemonic = "umlal"; l_instr = true; break;
+    case NEON_SMLSL_byelement:    mnemonic = "smlsl"; l_instr = true; break;
+    case NEON_UMLSL_byelement:    mnemonic = "umlsl"; l_instr = true; break;
+    case NEON_SQDMULL_byelement:  mnemonic = "sqdmull"; l_instr = true; break;
+    case NEON_SQDMLAL_byelement:  mnemonic = "sqdmlal"; l_instr = true; break;
+    case NEON_SQDMLSL_byelement:  mnemonic = "sqdmlsl"; l_instr = true; break;
+    case NEON_MUL_byelement:      mnemonic = "mul"; break;
+    case NEON_MLA_byelement:      mnemonic = "mla"; break;
+    case NEON_MLS_byelement:      mnemonic = "mls"; break;
+    case NEON_SQDMULH_byelement:  mnemonic = "sqdmulh";  break;
+    case NEON_SQRDMULH_byelement: mnemonic = "sqrdmulh"; break;
+    default:
+      switch (instr->Mask(NEONByIndexedElementFPMask)) {
+        case NEON_FMUL_byelement:  mnemonic = "fmul";  fp_instr = true; break;
+        case NEON_FMLA_byelement:  mnemonic = "fmla";  fp_instr = true; break;
+        case NEON_FMLS_byelement:  mnemonic = "fmls";  fp_instr = true; break;
+        case NEON_FMULX_byelement: mnemonic = "fmulx"; fp_instr = true; break;
+      }
+  }
+
+  if (l_instr) {
+    Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
+  } else if (fp_instr) {
+    nfd.SetFormatMap(0, nfd.FPFormatMap());
+    Format(instr, mnemonic, nfd.Substitute(form));
+  } else {
+    nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+    Format(instr, mnemonic, nfd.Substitute(form));
+  }
+}
+
+
+void Disassembler::VisitNEONCopy(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(NEONCopy)";
+
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap(),
+                               NEONFormatDecoder::TriangularScalarFormatMap());
+
+  if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
+    mnemonic = "mov";
+    nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+    form = "'Vd.%s['IVInsIndex1], 'Vn.%s['IVInsIndex2]";
+  } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
+    mnemonic = "mov";
+    nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+    if (nfd.GetVectorFormat() == kFormatD) {
+      form = "'Vd.%s['IVInsIndex1], 'Xn";
+    } else {
+      form = "'Vd.%s['IVInsIndex1], 'Wn";
+    }
+  } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
+    if (instr->Mask(NEON_Q) || ((instr->ImmNEON5() & 7) == 4)) {
+      mnemonic = "mov";
+    } else {
+      mnemonic = "umov";
+    }
+    nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+    if (nfd.GetVectorFormat() == kFormatD) {
+      form = "'Xd, 'Vn.%s['IVInsIndex1]";
+    } else {
+      form = "'Wd, 'Vn.%s['IVInsIndex1]";
+    }
+  } else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) {
+    mnemonic = "smov";
+    nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+    form = "'Rdq, 'Vn.%s['IVInsIndex1]";
+  } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
+    mnemonic = "dup";
+    form = "'Vd.%s, 'Vn.%s['IVInsIndex1]";
+  } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
+    mnemonic = "dup";
+    if (nfd.GetVectorFormat() == kFormat2D) {
+      form = "'Vd.%s, 'Xn";
+    } else {
+      form = "'Vd.%s, 'Wn";
+    }
+  }
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONExtract(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(NEONExtract)";
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+  if (instr->Mask(NEONExtractMask) == NEON_EXT) {
+    mnemonic = "ext";
+    form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVExtract";
+  }
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(NEONLoadStoreMultiStruct)";
+  const char *form_1v = "{'Vt.%1$s}, ['Xns]";
+  const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns]";
+  const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns]";
+  const char *form_4v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]";
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+
+  switch (instr->Mask(NEONLoadStoreMultiStructMask)) {
+    case NEON_LD1_1v: mnemonic = "ld1"; form = form_1v; break;
+    case NEON_LD1_2v: mnemonic = "ld1"; form = form_2v; break;
+    case NEON_LD1_3v: mnemonic = "ld1"; form = form_3v; break;
+    case NEON_LD1_4v: mnemonic = "ld1"; form = form_4v; break;
+    case NEON_LD2: mnemonic = "ld2"; form = form_2v; break;
+    case NEON_LD3: mnemonic = "ld3"; form = form_3v; break;
+    case NEON_LD4: mnemonic = "ld4"; form = form_4v; break;
+    case NEON_ST1_1v: mnemonic = "st1"; form = form_1v; break;
+    case NEON_ST1_2v: mnemonic = "st1"; form = form_2v; break;
+    case NEON_ST1_3v: mnemonic = "st1"; form = form_3v; break;
+    case NEON_ST1_4v: mnemonic = "st1"; form = form_4v; break;
+    case NEON_ST2: mnemonic = "st2"; form = form_2v; break;
+    case NEON_ST3: mnemonic = "st3"; form = form_3v; break;
+    case NEON_ST4: mnemonic = "st4"; form = form_4v; break;
+    default: break;
+  }
+
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONLoadStoreMultiStructPostIndex(
+    const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(NEONLoadStoreMultiStructPostIndex)";
+  const char *form_1v = "{'Vt.%1$s}, ['Xns], 'Xmr1";
+  const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns], 'Xmr2";
+  const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns], 'Xmr3";
+  const char *form_4v =
+      "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmr4";
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+
+  switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
+    case NEON_LD1_1v_post: mnemonic = "ld1"; form = form_1v; break;
+    case NEON_LD1_2v_post: mnemonic = "ld1"; form = form_2v; break;
+    case NEON_LD1_3v_post: mnemonic = "ld1"; form = form_3v; break;
+    case NEON_LD1_4v_post: mnemonic = "ld1"; form = form_4v; break;
+    case NEON_LD2_post: mnemonic = "ld2"; form = form_2v; break;
+    case NEON_LD3_post: mnemonic = "ld3"; form = form_3v; break;
+    case NEON_LD4_post: mnemonic = "ld4"; form = form_4v; break;
+    case NEON_ST1_1v_post: mnemonic = "st1"; form = form_1v; break;
+    case NEON_ST1_2v_post: mnemonic = "st1"; form = form_2v; break;
+    case NEON_ST1_3v_post: mnemonic = "st1"; form = form_3v; break;
+    case NEON_ST1_4v_post: mnemonic = "st1"; form = form_4v; break;
+    case NEON_ST2_post: mnemonic = "st2"; form = form_2v; break;
+    case NEON_ST3_post: mnemonic = "st3"; form = form_3v; break;
+    case NEON_ST4_post: mnemonic = "st4"; form = form_4v; break;
+    default: break;
+  }
+
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(NEONLoadStoreSingleStruct)";
+
+  const char *form_1b = "{'Vt.b}['IVLSLane0], ['Xns]";
+  const char *form_1h = "{'Vt.h}['IVLSLane1], ['Xns]";
+  const char *form_1s = "{'Vt.s}['IVLSLane2], ['Xns]";
+  const char *form_1d = "{'Vt.d}['IVLSLane3], ['Xns]";
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+
+  switch (instr->Mask(NEONLoadStoreSingleStructMask)) {
+    case NEON_LD1_b: mnemonic = "ld1"; form = form_1b; break;
+    case NEON_LD1_h: mnemonic = "ld1"; form = form_1h; break;
+    case NEON_LD1_s:
+      mnemonic = "ld1";
+      VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
+      form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d;
+      break;
+    case NEON_ST1_b: mnemonic = "st1"; form = form_1b; break;
+    case NEON_ST1_h: mnemonic = "st1"; form = form_1h; break;
+    case NEON_ST1_s:
+      mnemonic = "st1";
+      VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
+      form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d;
+      break;
+    case NEON_LD1R:
+      mnemonic = "ld1r";
+      form = "{'Vt.%s}, ['Xns]";
+      break;
+    case NEON_LD2_b:
+    case NEON_ST2_b:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+      form = "{'Vt.b, 'Vt2.b}['IVLSLane0], ['Xns]";
+      break;
+    case NEON_LD2_h:
+    case NEON_ST2_h:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+      form = "{'Vt.h, 'Vt2.h}['IVLSLane1], ['Xns]";
+      break;
+    case NEON_LD2_s:
+    case NEON_ST2_s:
+      VIXL_STATIC_ASSERT((NEON_ST2_s | (1 << NEONLSSize_offset)) == NEON_ST2_d);
+      VIXL_STATIC_ASSERT((NEON_LD2_s | (1 << NEONLSSize_offset)) == NEON_LD2_d);
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+      if ((instr->NEONLSSize() & 1) == 0)
+        form = "{'Vt.s, 'Vt2.s}['IVLSLane2], ['Xns]";
+      else
+        form = "{'Vt.d, 'Vt2.d}['IVLSLane3], ['Xns]";
+      break;
+    case NEON_LD2R:
+      mnemonic = "ld2r";
+      form = "{'Vt.%s, 'Vt2.%s}, ['Xns]";
+      break;
+    case NEON_LD3_b:
+    case NEON_ST3_b:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+      form = "{'Vt.b, 'Vt2.b, 'Vt3.b}['IVLSLane0], ['Xns]";
+      break;
+    case NEON_LD3_h:
+    case NEON_ST3_h:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+      form = "{'Vt.h, 'Vt2.h, 'Vt3.h}['IVLSLane1], ['Xns]";
+      break;
+    case NEON_LD3_s:
+    case NEON_ST3_s:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+      if ((instr->NEONLSSize() & 1) == 0)
+        form = "{'Vt.s, 'Vt2.s, 'Vt3.s}['IVLSLane2], ['Xns]";
+      else
+        form = "{'Vt.d, 'Vt2.d, 'Vt3.d}['IVLSLane3], ['Xns]";
+      break;
+    case NEON_LD3R:
+      mnemonic = "ld3r";
+      form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns]";
+      break;
+    case NEON_LD4_b:
+     case NEON_ST4_b:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+      form = "{'Vt.b, 'Vt2.b, 'Vt3.b, 'Vt4.b}['IVLSLane0], ['Xns]";
+      break;
+    case NEON_LD4_h:
+    case NEON_ST4_h:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+      form = "{'Vt.h, 'Vt2.h, 'Vt3.h, 'Vt4.h}['IVLSLane1], ['Xns]";
+      break;
+    case NEON_LD4_s:
+    case NEON_ST4_s:
+      VIXL_STATIC_ASSERT((NEON_LD4_s | (1 << NEONLSSize_offset)) == NEON_LD4_d);
+      VIXL_STATIC_ASSERT((NEON_ST4_s | (1 << NEONLSSize_offset)) == NEON_ST4_d);
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+      if ((instr->NEONLSSize() & 1) == 0)
+        form = "{'Vt.s, 'Vt2.s, 'Vt3.s, 'Vt4.s}['IVLSLane2], ['Xns]";
+      else
+        form = "{'Vt.d, 'Vt2.d, 'Vt3.d, 'Vt4.d}['IVLSLane3], ['Xns]";
+      break;
+    case NEON_LD4R:
+      mnemonic = "ld4r";
+      form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]";
+      break;
+    default: break;
+  }
+
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONLoadStoreSingleStructPostIndex(
+    const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(NEONLoadStoreSingleStructPostIndex)";
+
+  const char *form_1b = "{'Vt.b}['IVLSLane0], ['Xns], 'Xmb1";
+  const char *form_1h = "{'Vt.h}['IVLSLane1], ['Xns], 'Xmb2";
+  const char *form_1s = "{'Vt.s}['IVLSLane2], ['Xns], 'Xmb4";
+  const char *form_1d = "{'Vt.d}['IVLSLane3], ['Xns], 'Xmb8";
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+
+  switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
+    case NEON_LD1_b_post: mnemonic = "ld1"; form = form_1b; break;
+    case NEON_LD1_h_post: mnemonic = "ld1"; form = form_1h; break;
+    case NEON_LD1_s_post:
+      mnemonic = "ld1";
+      VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
+      form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d;
+      break;
+    case NEON_ST1_b_post: mnemonic = "st1"; form = form_1b; break;
+    case NEON_ST1_h_post: mnemonic = "st1"; form = form_1h; break;
+    case NEON_ST1_s_post:
+      mnemonic = "st1";
+      VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
+      form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d;
+      break;
+    case NEON_LD1R_post:
+      mnemonic = "ld1r";
+      form = "{'Vt.%s}, ['Xns], 'Xmz1";
+      break;
+    case NEON_LD2_b_post:
+    case NEON_ST2_b_post:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+      form = "{'Vt.b, 'Vt2.b}['IVLSLane0], ['Xns], 'Xmb2";
+      break;
+    case NEON_ST2_h_post:
+    case NEON_LD2_h_post:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+      form = "{'Vt.h, 'Vt2.h}['IVLSLane1], ['Xns], 'Xmb4";
+      break;
+    case NEON_LD2_s_post:
+    case NEON_ST2_s_post:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+      if ((instr->NEONLSSize() & 1) == 0)
+        form = "{'Vt.s, 'Vt2.s}['IVLSLane2], ['Xns], 'Xmb8";
+      else
+        form = "{'Vt.d, 'Vt2.d}['IVLSLane3], ['Xns], 'Xmb16";
+      break;
+    case NEON_LD2R_post:
+      mnemonic = "ld2r";
+      form = "{'Vt.%s, 'Vt2.%s}, ['Xns], 'Xmz2";
+      break;
+    case NEON_LD3_b_post:
+    case NEON_ST3_b_post:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+      form = "{'Vt.b, 'Vt2.b, 'Vt3.b}['IVLSLane0], ['Xns], 'Xmb3";
+      break;
+    case NEON_LD3_h_post:
+    case NEON_ST3_h_post:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+      form = "{'Vt.h, 'Vt2.h, 'Vt3.h}['IVLSLane1], ['Xns], 'Xmb6";
+      break;
+    case NEON_LD3_s_post:
+    case NEON_ST3_s_post:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+      if ((instr->NEONLSSize() & 1) == 0)
+        form = "{'Vt.s, 'Vt2.s, 'Vt3.s}['IVLSLane2], ['Xns], 'Xmb12";
+      else
+        form = "{'Vt.d, 'Vt2.d, 'Vt3.d}['IVLSLane3], ['Xns], 'Xmr3";
+      break;
+    case NEON_LD3R_post:
+      mnemonic = "ld3r";
+      form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns], 'Xmz3";
+      break;
+    case NEON_LD4_b_post:
+    case NEON_ST4_b_post:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+      form = "{'Vt.b, 'Vt2.b, 'Vt3.b, 'Vt4.b}['IVLSLane0], ['Xns], 'Xmb4";
+      break;
+    case NEON_LD4_h_post:
+    case NEON_ST4_h_post:
+      mnemonic = (instr->LdStXLoad()) == 1 ? "ld4" : "st4";
+      form = "{'Vt.h, 'Vt2.h, 'Vt3.h, 'Vt4.h}['IVLSLane1], ['Xns], 'Xmb8";
+      break;
+    case NEON_LD4_s_post:
+    case NEON_ST4_s_post:
+      mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+      if ((instr->NEONLSSize() & 1) == 0)
+        form = "{'Vt.s, 'Vt2.s, 'Vt3.s, 'Vt4.s}['IVLSLane2], ['Xns], 'Xmb16";
+      else
+        form = "{'Vt.d, 'Vt2.d, 'Vt3.d, 'Vt4.d}['IVLSLane3], ['Xns], 'Xmb32";
+      break;
+    case NEON_LD4R_post:
+      mnemonic = "ld4r";
+      form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmz4";
+      break;
+    default: break;
+  }
+
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONModifiedImmediate(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Vt.%s, 'IVMIImm8, lsl 'IVMIShiftAmt1";
+
+  int cmode   = instr->NEONCmode();
+  int cmode_3 = (cmode >> 3) & 1;
+  int cmode_2 = (cmode >> 2) & 1;
+  int cmode_1 = (cmode >> 1) & 1;
+  int cmode_0 = cmode & 1;
+  int q = instr->NEONQ();
+  int op = instr->NEONModImmOp();
+
+  static const NEONFormatMap map_b = { {30}, {NF_8B, NF_16B} };
+  static const NEONFormatMap map_h = { {30}, {NF_4H, NF_8H} };
+  static const NEONFormatMap map_s = { {30}, {NF_2S, NF_4S} };
+  NEONFormatDecoder nfd(instr, &map_b);
+
+  if (cmode_3 == 0) {
+    if (cmode_0 == 0) {
+      mnemonic = (op == 1) ? "mvni" : "movi";
+    } else {  // cmode<0> == '1'.
+      mnemonic = (op == 1) ? "bic" : "orr";
+    }
+    nfd.SetFormatMap(0, &map_s);
+  } else {  // cmode<3> == '1'.
+    if (cmode_2 == 0) {
+      if (cmode_0 == 0) {
+        mnemonic = (op == 1) ? "mvni" : "movi";
+      } else {  // cmode<0> == '1'.
+        mnemonic = (op == 1) ? "bic" : "orr";
+      }
+      nfd.SetFormatMap(0, &map_h);
+    } else {  // cmode<2> == '1'.
+      if (cmode_1 == 0) {
+        mnemonic = (op == 1) ? "mvni" : "movi";
+        form = "'Vt.%s, 'IVMIImm8, msl 'IVMIShiftAmt2";
+        nfd.SetFormatMap(0, &map_s);
+      } else {   // cmode<1> == '1'.
+        if (cmode_0 == 0) {
+          mnemonic = "movi";
+          if (op == 0) {
+            form = "'Vt.%s, 'IVMIImm8";
+          } else {
+            form = (q == 0) ? "'Dd, 'IVMIImm" : "'Vt.2d, 'IVMIImm";
+          }
+        } else {  // cmode<0> == '1'
+          mnemonic = "fmov";
+          if (op == 0) {
+            form  = "'Vt.%s, 'IVMIImmFPSingle";
+            nfd.SetFormatMap(0, &map_s);
+          } else {
+            if (q == 1) {
+              form = "'Vt.2d, 'IVMIImmFPDouble";
+            }
+          }
+        }
+      }
+    }
+  }
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONScalar2RegMisc(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form     = "%sd, %sn";
+  const char *form_0   = "%sd, %sn, #0";
+  const char *form_fp0 = "%sd, %sn, #0.0";
+
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+
+  if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
+    // These instructions all use a two bit size field, except NOT and RBIT,
+    // which use the field to encode the operation.
+    switch (instr->Mask(NEONScalar2RegMiscMask)) {
+      case NEON_CMGT_zero_scalar: mnemonic = "cmgt"; form = form_0; break;
+      case NEON_CMGE_zero_scalar: mnemonic = "cmge"; form = form_0; break;
+      case NEON_CMLE_zero_scalar: mnemonic = "cmle"; form = form_0; break;
+      case NEON_CMLT_zero_scalar: mnemonic = "cmlt"; form = form_0; break;
+      case NEON_CMEQ_zero_scalar: mnemonic = "cmeq"; form = form_0; break;
+      case NEON_NEG_scalar:       mnemonic = "neg";   break;
+      case NEON_SQNEG_scalar:     mnemonic = "sqneg"; break;
+      case NEON_ABS_scalar:       mnemonic = "abs";   break;
+      case NEON_SQABS_scalar:     mnemonic = "sqabs"; break;
+      case NEON_SUQADD_scalar:    mnemonic = "suqadd"; break;
+      case NEON_USQADD_scalar:    mnemonic = "usqadd"; break;
+      default: form = "(NEONScalar2RegMisc)";
+    }
+  } else {
+    // These instructions all use a one bit size field, except SQXTUN, SQXTN
+    // and UQXTN, which use a two bit size field.
+    nfd.SetFormatMaps(nfd.FPScalarFormatMap());
+    switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
+      case NEON_FRSQRTE_scalar:    mnemonic = "frsqrte"; break;
+      case NEON_FRECPE_scalar:     mnemonic = "frecpe";  break;
+      case NEON_SCVTF_scalar:      mnemonic = "scvtf"; break;
+      case NEON_UCVTF_scalar:      mnemonic = "ucvtf"; break;
+      case NEON_FCMGT_zero_scalar: mnemonic = "fcmgt"; form = form_fp0; break;
+      case NEON_FCMGE_zero_scalar: mnemonic = "fcmge"; form = form_fp0; break;
+      case NEON_FCMLE_zero_scalar: mnemonic = "fcmle"; form = form_fp0; break;
+      case NEON_FCMLT_zero_scalar: mnemonic = "fcmlt"; form = form_fp0; break;
+      case NEON_FCMEQ_zero_scalar: mnemonic = "fcmeq"; form = form_fp0; break;
+      case NEON_FRECPX_scalar:     mnemonic = "frecpx"; break;
+      case NEON_FCVTNS_scalar:     mnemonic = "fcvtns"; break;
+      case NEON_FCVTNU_scalar:     mnemonic = "fcvtnu"; break;
+      case NEON_FCVTPS_scalar:     mnemonic = "fcvtps"; break;
+      case NEON_FCVTPU_scalar:     mnemonic = "fcvtpu"; break;
+      case NEON_FCVTMS_scalar:     mnemonic = "fcvtms"; break;
+      case NEON_FCVTMU_scalar:     mnemonic = "fcvtmu"; break;
+      case NEON_FCVTZS_scalar:     mnemonic = "fcvtzs"; break;
+      case NEON_FCVTZU_scalar:     mnemonic = "fcvtzu"; break;
+      case NEON_FCVTAS_scalar:     mnemonic = "fcvtas"; break;
+      case NEON_FCVTAU_scalar:     mnemonic = "fcvtau"; break;
+      case NEON_FCVTXN_scalar:
+        nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
+        mnemonic = "fcvtxn";
+        break;
+      default:
+        nfd.SetFormatMap(0, nfd.ScalarFormatMap());
+        nfd.SetFormatMap(1, nfd.LongScalarFormatMap());
+        switch (instr->Mask(NEONScalar2RegMiscMask)) {
+          case NEON_SQXTN_scalar:  mnemonic = "sqxtn"; break;
+          case NEON_UQXTN_scalar:  mnemonic = "uqxtn"; break;
+          case NEON_SQXTUN_scalar: mnemonic = "sqxtun"; break;
+          default: form = "(NEONScalar2RegMisc)";
+        }
+    }
+  }
+  Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+
+void Disassembler::VisitNEONScalar3Diff(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "%sd, %sn, %sm";
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap(),
+                               NEONFormatDecoder::ScalarFormatMap());
+
+  switch (instr->Mask(NEONScalar3DiffMask)) {
+    case NEON_SQDMLAL_scalar  : mnemonic = "sqdmlal"; break;
+    case NEON_SQDMLSL_scalar  : mnemonic = "sqdmlsl"; break;
+    case NEON_SQDMULL_scalar  : mnemonic = "sqdmull"; break;
+    default: form = "(NEONScalar3Diff)";
+  }
+  Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+
+void Disassembler::VisitNEONScalar3Same(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "%sd, %sn, %sm";
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+
+  if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
+    nfd.SetFormatMaps(nfd.FPScalarFormatMap());
+    switch (instr->Mask(NEONScalar3SameFPMask)) {
+      case NEON_FACGE_scalar:   mnemonic = "facge"; break;
+      case NEON_FACGT_scalar:   mnemonic = "facgt"; break;
+      case NEON_FCMEQ_scalar:   mnemonic = "fcmeq"; break;
+      case NEON_FCMGE_scalar:   mnemonic = "fcmge"; break;
+      case NEON_FCMGT_scalar:   mnemonic = "fcmgt"; break;
+      case NEON_FMULX_scalar:   mnemonic = "fmulx"; break;
+      case NEON_FRECPS_scalar:  mnemonic = "frecps"; break;
+      case NEON_FRSQRTS_scalar: mnemonic = "frsqrts"; break;
+      case NEON_FABD_scalar:    mnemonic = "fabd"; break;
+      default: form = "(NEONScalar3Same)";
+    }
+  } else {
+    switch (instr->Mask(NEONScalar3SameMask)) {
+      case NEON_ADD_scalar:    mnemonic = "add";    break;
+      case NEON_SUB_scalar:    mnemonic = "sub";    break;
+      case NEON_CMEQ_scalar:   mnemonic = "cmeq";   break;
+      case NEON_CMGE_scalar:   mnemonic = "cmge";   break;
+      case NEON_CMGT_scalar:   mnemonic = "cmgt";   break;
+      case NEON_CMHI_scalar:   mnemonic = "cmhi";   break;
+      case NEON_CMHS_scalar:   mnemonic = "cmhs";   break;
+      case NEON_CMTST_scalar:  mnemonic = "cmtst";  break;
+      case NEON_UQADD_scalar:  mnemonic = "uqadd";  break;
+      case NEON_SQADD_scalar:  mnemonic = "sqadd";  break;
+      case NEON_UQSUB_scalar:  mnemonic = "uqsub";  break;
+      case NEON_SQSUB_scalar:  mnemonic = "sqsub";  break;
+      case NEON_USHL_scalar:   mnemonic = "ushl";   break;
+      case NEON_SSHL_scalar:   mnemonic = "sshl";   break;
+      case NEON_UQSHL_scalar:  mnemonic = "uqshl";  break;
+      case NEON_SQSHL_scalar:  mnemonic = "sqshl";  break;
+      case NEON_URSHL_scalar:  mnemonic = "urshl";  break;
+      case NEON_SRSHL_scalar:  mnemonic = "srshl";  break;
+      case NEON_UQRSHL_scalar: mnemonic = "uqrshl"; break;
+      case NEON_SQRSHL_scalar: mnemonic = "sqrshl"; break;
+      case NEON_SQDMULH_scalar:  mnemonic = "sqdmulh";  break;
+      case NEON_SQRDMULH_scalar: mnemonic = "sqrdmulh"; break;
+      default: form = "(NEONScalar3Same)";
+    }
+  }
+  Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+
+void Disassembler::VisitNEONScalarByIndexedElement(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+  bool long_instr = false;
+
+  switch (instr->Mask(NEONScalarByIndexedElementMask)) {
+    case NEON_SQDMULL_byelement_scalar:
+      mnemonic = "sqdmull";
+      long_instr = true;
+      break;
+    case NEON_SQDMLAL_byelement_scalar:
+      mnemonic = "sqdmlal";
+      long_instr = true;
+      break;
+    case NEON_SQDMLSL_byelement_scalar:
+      mnemonic = "sqdmlsl";
+      long_instr = true;
+      break;
+    case NEON_SQDMULH_byelement_scalar:
+      mnemonic = "sqdmulh";
+      break;
+    case NEON_SQRDMULH_byelement_scalar:
+      mnemonic = "sqrdmulh";
+      break;
+    default:
+      nfd.SetFormatMap(0, nfd.FPScalarFormatMap());
+      switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
+        case NEON_FMUL_byelement_scalar: mnemonic = "fmul"; break;
+        case NEON_FMLA_byelement_scalar: mnemonic = "fmla"; break;
+        case NEON_FMLS_byelement_scalar: mnemonic = "fmls"; break;
+        case NEON_FMULX_byelement_scalar: mnemonic = "fmulx"; break;
+        default: form = "(NEONScalarByIndexedElement)";
+      }
+  }
+
+  if (long_instr) {
+    nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
+  }
+
+  Format(instr, mnemonic, nfd.Substitute(
+    form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat));
+}
+
+
+void Disassembler::VisitNEONScalarCopy(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(NEONScalarCopy)";
+
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
+
+  if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
+    mnemonic = "mov";
+    form = "%sd, 'Vn.%s['IVInsIndex1]";
+  }
+
+  Format(instr, mnemonic, nfd.Substitute(form, nfd.kPlaceholder, nfd.kFormat));
+}
+
+
+void Disassembler::VisitNEONScalarPairwise(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "%sd, 'Vn.%s";
+  NEONFormatMap map = { {22}, {NF_2S, NF_2D} };
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap(), &map);
+
+  switch (instr->Mask(NEONScalarPairwiseMask)) {
+    case NEON_ADDP_scalar:    mnemonic = "addp"; break;
+    case NEON_FADDP_scalar:   mnemonic = "faddp"; break;
+    case NEON_FMAXP_scalar:   mnemonic = "fmaxp"; break;
+    case NEON_FMAXNMP_scalar: mnemonic = "fmaxnmp"; break;
+    case NEON_FMINP_scalar:   mnemonic = "fminp"; break;
+    case NEON_FMINNMP_scalar: mnemonic = "fminnmp"; break;
+    default: form = "(NEONScalarPairwise)";
+  }
+  Format(instr, mnemonic, nfd.Substitute(form,
+      NEONFormatDecoder::kPlaceholder, NEONFormatDecoder::kFormat));
+}
+
+
+void Disassembler::VisitNEONScalarShiftImmediate(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form   = "%sd, %sn, 'Is1";
+  const char *form_2 = "%sd, %sn, 'Is2";
+
+  static const NEONFormatMap map_shift = {
+    {22, 21, 20, 19},
+    {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S,
+     NF_D,     NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D}
+  };
+  static const NEONFormatMap map_shift_narrow = {
+    {21, 20, 19},
+    {NF_UNDEF, NF_H, NF_S, NF_S, NF_D, NF_D, NF_D, NF_D}
+  };
+  NEONFormatDecoder nfd(instr, &map_shift);
+
+  if (instr->ImmNEONImmh()) {  // immh has to be non-zero.
+    switch (instr->Mask(NEONScalarShiftImmediateMask)) {
+      case NEON_FCVTZU_imm_scalar: mnemonic = "fcvtzu";    break;
+      case NEON_FCVTZS_imm_scalar: mnemonic = "fcvtzs";   break;
+      case NEON_SCVTF_imm_scalar: mnemonic = "scvtf";    break;
+      case NEON_UCVTF_imm_scalar: mnemonic = "ucvtf";   break;
+      case NEON_SRI_scalar:       mnemonic = "sri";    break;
+      case NEON_SSHR_scalar:      mnemonic = "sshr";   break;
+      case NEON_USHR_scalar:      mnemonic = "ushr";   break;
+      case NEON_SRSHR_scalar:     mnemonic = "srshr";  break;
+      case NEON_URSHR_scalar:     mnemonic = "urshr";  break;
+      case NEON_SSRA_scalar:      mnemonic = "ssra";   break;
+      case NEON_USRA_scalar:      mnemonic = "usra";   break;
+      case NEON_SRSRA_scalar:     mnemonic = "srsra";  break;
+      case NEON_URSRA_scalar:     mnemonic = "ursra";  break;
+      case NEON_SHL_scalar:       mnemonic = "shl";    form = form_2; break;
+      case NEON_SLI_scalar:       mnemonic = "sli";    form = form_2; break;
+      case NEON_SQSHLU_scalar:    mnemonic = "sqshlu"; form = form_2; break;
+      case NEON_SQSHL_imm_scalar: mnemonic = "sqshl";  form = form_2; break;
+      case NEON_UQSHL_imm_scalar: mnemonic = "uqshl";  form = form_2; break;
+      case NEON_UQSHRN_scalar:
+        mnemonic = "uqshrn";
+        nfd.SetFormatMap(1, &map_shift_narrow);
+        break;
+      case NEON_UQRSHRN_scalar:
+        mnemonic = "uqrshrn";
+        nfd.SetFormatMap(1, &map_shift_narrow);
+        break;
+      case NEON_SQSHRN_scalar:
+        mnemonic = "sqshrn";
+        nfd.SetFormatMap(1, &map_shift_narrow);
+        break;
+      case NEON_SQRSHRN_scalar:
+        mnemonic = "sqrshrn";
+        nfd.SetFormatMap(1, &map_shift_narrow);
+        break;
+      case NEON_SQSHRUN_scalar:
+        mnemonic = "sqshrun";
+        nfd.SetFormatMap(1, &map_shift_narrow);
+        break;
+      case NEON_SQRSHRUN_scalar:
+        mnemonic = "sqrshrun";
+        nfd.SetFormatMap(1, &map_shift_narrow);
+        break;
+      default:
+        form = "(NEONScalarShiftImmediate)";
+    }
+  } else {
+    form = "(NEONScalarShiftImmediate)";
+  }
+  Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+
+void Disassembler::VisitNEONShiftImmediate(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form         = "'Vd.%s, 'Vn.%s, 'Is1";
+  const char *form_shift_2 = "'Vd.%s, 'Vn.%s, 'Is2";
+  const char *form_xtl     = "'Vd.%s, 'Vn.%s";
+
+  // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
+  static const NEONFormatMap map_shift_ta = {
+    {22, 21, 20, 19},
+    {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}
+  };
+
+  // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
+  // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
+  static const NEONFormatMap map_shift_tb = {
+    {22, 21, 20, 19, 30},
+    {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B, NF_4H,    NF_8H, NF_4H,    NF_8H,
+     NF_2S,    NF_4S,    NF_2S,    NF_4S,  NF_2S,    NF_4S, NF_2S,    NF_4S,
+     NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
+     NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}
+  };
+
+  NEONFormatDecoder nfd(instr, &map_shift_tb);
+
+  if (instr->ImmNEONImmh()) {  // immh has to be non-zero.
+    switch (instr->Mask(NEONShiftImmediateMask)) {
+      case NEON_SQSHLU:     mnemonic = "sqshlu"; form = form_shift_2; break;
+      case NEON_SQSHL_imm:  mnemonic = "sqshl";  form = form_shift_2; break;
+      case NEON_UQSHL_imm:  mnemonic = "uqshl";  form = form_shift_2; break;
+      case NEON_SHL:        mnemonic = "shl";    form = form_shift_2; break;
+      case NEON_SLI:        mnemonic = "sli";    form = form_shift_2; break;
+      case NEON_SCVTF_imm:  mnemonic = "scvtf";  break;
+      case NEON_UCVTF_imm:  mnemonic = "ucvtf";  break;
+      case NEON_FCVTZU_imm: mnemonic = "fcvtzu"; break;
+      case NEON_FCVTZS_imm: mnemonic = "fcvtzs"; break;
+      case NEON_SRI:        mnemonic = "sri";    break;
+      case NEON_SSHR:       mnemonic = "sshr";   break;
+      case NEON_USHR:       mnemonic = "ushr";   break;
+      case NEON_SRSHR:      mnemonic = "srshr";  break;
+      case NEON_URSHR:      mnemonic = "urshr";  break;
+      case NEON_SSRA:       mnemonic = "ssra";   break;
+      case NEON_USRA:       mnemonic = "usra";   break;
+      case NEON_SRSRA:      mnemonic = "srsra";  break;
+      case NEON_URSRA:      mnemonic = "ursra";  break;
+      case NEON_SHRN:
+        mnemonic = instr->Mask(NEON_Q) ? "shrn2" : "shrn";
+        nfd.SetFormatMap(1, &map_shift_ta);
+        break;
+      case NEON_RSHRN:
+        mnemonic = instr->Mask(NEON_Q) ? "rshrn2" : "rshrn";
+        nfd.SetFormatMap(1, &map_shift_ta);
+        break;
+      case NEON_UQSHRN:
+        mnemonic = instr->Mask(NEON_Q) ? "uqshrn2" : "uqshrn";
+        nfd.SetFormatMap(1, &map_shift_ta);
+        break;
+      case NEON_UQRSHRN:
+        mnemonic = instr->Mask(NEON_Q) ? "uqrshrn2" : "uqrshrn";
+        nfd.SetFormatMap(1, &map_shift_ta);
+        break;
+      case NEON_SQSHRN:
+        mnemonic = instr->Mask(NEON_Q) ? "sqshrn2" : "sqshrn";
+        nfd.SetFormatMap(1, &map_shift_ta);
+        break;
+      case NEON_SQRSHRN:
+        mnemonic = instr->Mask(NEON_Q) ? "sqrshrn2" : "sqrshrn";
+        nfd.SetFormatMap(1, &map_shift_ta);
+        break;
+      case NEON_SQSHRUN:
+        mnemonic = instr->Mask(NEON_Q) ? "sqshrun2" : "sqshrun";
+        nfd.SetFormatMap(1, &map_shift_ta);
+        break;
+      case NEON_SQRSHRUN:
+        mnemonic = instr->Mask(NEON_Q) ? "sqrshrun2" : "sqrshrun";
+        nfd.SetFormatMap(1, &map_shift_ta);
+        break;
+      case NEON_SSHLL:
+        nfd.SetFormatMap(0, &map_shift_ta);
+        if (instr->ImmNEONImmb() == 0 &&
+            CountSetBits(instr->ImmNEONImmh(), 32) == 1) {  // sxtl variant.
+          form = form_xtl;
+          mnemonic = instr->Mask(NEON_Q) ? "sxtl2" : "sxtl";
+        } else {  // sshll variant.
+          form = form_shift_2;
+          mnemonic = instr->Mask(NEON_Q) ? "sshll2" : "sshll";
+        }
+        break;
+      case NEON_USHLL:
+        nfd.SetFormatMap(0, &map_shift_ta);
+        if (instr->ImmNEONImmb() == 0 &&
+            CountSetBits(instr->ImmNEONImmh(), 32) == 1) {  // uxtl variant.
+          form = form_xtl;
+          mnemonic = instr->Mask(NEON_Q) ? "uxtl2" : "uxtl";
+        } else {  // ushll variant.
+          form = form_shift_2;
+          mnemonic = instr->Mask(NEON_Q) ? "ushll2" : "ushll";
+        }
+        break;
+      default: form = "(NEONShiftImmediate)";
+    }
+  } else {
+    form = "(NEONShiftImmediate)";
+  }
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONTable(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(NEONTable)";
+  const char form_1v[] = "'Vd.%%s, {'Vn.16b}, 'Vm.%%s";
+  const char form_2v[] = "'Vd.%%s, {'Vn.16b, v%d.16b}, 'Vm.%%s";
+  const char form_3v[] = "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b}, 'Vm.%%s";
+  const char form_4v[] =
+      "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b, v%d.16b}, 'Vm.%%s";
+  static const NEONFormatMap map_b = { {30}, {NF_8B, NF_16B} };
+  NEONFormatDecoder nfd(instr, &map_b);
+
+  switch (instr->Mask(NEONTableMask)) {
+    case NEON_TBL_1v: mnemonic = "tbl"; form = form_1v; break;
+    case NEON_TBL_2v: mnemonic = "tbl"; form = form_2v; break;
+    case NEON_TBL_3v: mnemonic = "tbl"; form = form_3v; break;
+    case NEON_TBL_4v: mnemonic = "tbl"; form = form_4v; break;
+    case NEON_TBX_1v: mnemonic = "tbx"; form = form_1v; break;
+    case NEON_TBX_2v: mnemonic = "tbx"; form = form_2v; break;
+    case NEON_TBX_3v: mnemonic = "tbx"; form = form_3v; break;
+    case NEON_TBX_4v: mnemonic = "tbx"; form = form_4v; break;
+    default: break;
+  }
+
+  char re_form[sizeof(form_4v) + 6];
+  int reg_num = instr->Rn();
+  SprintfLiteral(re_form, form,
+           (reg_num + 1) % kNumberOfVRegisters,
+           (reg_num + 2) % kNumberOfVRegisters,
+           (reg_num + 3) % kNumberOfVRegisters);
+
+  Format(instr, mnemonic, nfd.Substitute(re_form));
+}
+
+
+void Disassembler::VisitNEONPerm(const Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+  NEONFormatDecoder nfd(instr);
+
+  switch (instr->Mask(NEONPermMask)) {
+    case NEON_TRN1: mnemonic = "trn1";   break;
+    case NEON_TRN2: mnemonic = "trn2";  break;
+    case NEON_UZP1: mnemonic = "uzp1"; break;
+    case NEON_UZP2: mnemonic = "uzp2";  break;
+    case NEON_ZIP1: mnemonic = "zip1"; break;
+    case NEON_ZIP2: mnemonic = "zip2"; break;
+    default: form = "(NEONPerm)";
+  }
+  Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitUnimplemented(const Instruction* instr) {
+  Format(instr, "unimplemented", "(Unimplemented)");
+}
+
+
+void Disassembler::VisitUnallocated(const Instruction* instr) {
+  Format(instr, "unallocated", "(Unallocated)");
+}
+
+
+void Disassembler::ProcessOutput(const Instruction* /*instr*/) {
+  // The base disasm does nothing more than disassembling into a buffer.
+}
+
+
+void Disassembler::AppendRegisterNameToOutput(const Instruction* instr,
+                                              const CPURegister& reg) {
+  USE(instr);
+  VIXL_ASSERT(reg.IsValid());
+  char reg_char;
+
+  if (reg.IsRegister()) {
+    reg_char = reg.Is64Bits() ? 'x' : 'w';
+  } else {
+    VIXL_ASSERT(reg.IsVRegister());
+    switch (reg.SizeInBits()) {
+      case kBRegSize: reg_char = 'b'; break;
+      case kHRegSize: reg_char = 'h'; break;
+      case kSRegSize: reg_char = 's'; break;
+      case kDRegSize: reg_char = 'd'; break;
+      default:
+        VIXL_ASSERT(reg.Is128Bits());
+        reg_char = 'q';
+    }
+  }
+
+  if (reg.IsVRegister() || !(reg.Aliases(sp) || reg.Aliases(xzr))) {
+    // A core or scalar/vector register: [wx]0 - 30, [bhsdq]0 - 31.
+    AppendToOutput("%c%d", reg_char, reg.code());
+  } else if (reg.Aliases(sp)) {
+    // Disassemble w31/x31 as stack pointer wsp/sp.
+    AppendToOutput("%s", reg.Is64Bits() ? "sp" : "wsp");
+  } else {
+    // Disassemble w31/x31 as zero register wzr/xzr.
+    AppendToOutput("%czr", reg_char);
+  }
+}
+
+
+void Disassembler::AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                                  int64_t offset) {
+  USE(instr);
+  char sign = (offset < 0) ? '-' : '+';
+  AppendToOutput("#%c0x%" PRIx64, sign, std::abs(offset));
+}
+
+
+void Disassembler::AppendAddressToOutput(const Instruction* instr,
+                                         const void* addr) {
+  USE(instr);
+  AppendToOutput("(addr 0x%" PRIxPTR ")", reinterpret_cast<uintptr_t>(addr));
+}
+
+
+void Disassembler::AppendCodeAddressToOutput(const Instruction* instr,
+                                             const void* addr) {
+  AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendDataAddressToOutput(const Instruction* instr,
+                                             const void* addr) {
+  AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendCodeRelativeAddressToOutput(const Instruction* instr,
+                                                     const void* addr) {
+  USE(instr);
+  int64_t rel_addr = CodeRelativeAddress(addr);
+  if (rel_addr >= 0) {
+    AppendToOutput("(addr 0x%" PRIx64 ")", rel_addr);
+  } else {
+    AppendToOutput("(addr -0x%" PRIx64 ")", -rel_addr);
+  }
+}
+
+
+void Disassembler::AppendCodeRelativeCodeAddressToOutput(
+    const Instruction* instr, const void* addr) {
+  AppendCodeRelativeAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendCodeRelativeDataAddressToOutput(
+    const Instruction* instr, const void* addr) {
+  AppendCodeRelativeAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::MapCodeAddress(int64_t base_address,
+                                  const Instruction* instr_address) {
+  set_code_address_offset(
+      base_address - reinterpret_cast<intptr_t>(instr_address));
+}
+int64_t Disassembler::CodeRelativeAddress(const void* addr) {
+  return reinterpret_cast<intptr_t>(addr) + code_address_offset();
+}
+
+
+void Disassembler::Format(const Instruction* instr, const char* mnemonic,
+                          const char* format) {
+  VIXL_ASSERT(mnemonic != NULL);
+  ResetOutput();
+  uint32_t pos = buffer_pos_;
+  Substitute(instr, mnemonic);
+  if (format != NULL) {
+    uint32_t spaces = buffer_pos_ - pos < 8 ? 8 - (buffer_pos_ - pos) : 1;
+    while (spaces--) {
+      VIXL_ASSERT(buffer_pos_ < buffer_size_);
+      buffer_[buffer_pos_++] = ' ';
+    }
+    Substitute(instr, format);
+  }
+  VIXL_ASSERT(buffer_pos_ < buffer_size_);
+  buffer_[buffer_pos_] = 0;
+  ProcessOutput(instr);
+}
+
+
+void Disassembler::Substitute(const Instruction* instr, const char* string) {
+  char chr = *string++;
+  while (chr != '\0') {
+    if (chr == '\'') {
+      string += SubstituteField(instr, string);
+    } else {
+      VIXL_ASSERT(buffer_pos_ < buffer_size_);
+      buffer_[buffer_pos_++] = chr;
+    }
+    chr = *string++;
+  }
+}
+
+
+int Disassembler::SubstituteField(const Instruction* instr,
+                                  const char* format) {
+  switch (format[0]) {
+    // NB. The remaining substitution prefix characters are: GJKUZ.
+    case 'R':  // Register. X or W, selected by sf bit.
+    case 'F':  // FP register. S or D, selected by type field.
+    case 'V':  // Vector register, V, vector format.
+    case 'W':
+    case 'X':
+    case 'B':
+    case 'H':
+    case 'S':
+    case 'D':
+    case 'Q': return SubstituteRegisterField(instr, format);
+    case 'I': return SubstituteImmediateField(instr, format);
+    case 'L': return SubstituteLiteralField(instr, format);
+    case 'N': return SubstituteShiftField(instr, format);
+    case 'P': return SubstitutePrefetchField(instr, format);
+    case 'C': return SubstituteConditionField(instr, format);
+    case 'E': return SubstituteExtendField(instr, format);
+    case 'A': return SubstitutePCRelAddressField(instr, format);
+    case 'T': return SubstituteBranchTargetField(instr, format);
+    case 'O': return SubstituteLSRegOffsetField(instr, format);
+    case 'M': return SubstituteBarrierField(instr, format);
+    case 'K': return SubstituteCrField(instr, format);
+    case 'G': return SubstituteSysOpField(instr, format);
+    default: {
+      VIXL_UNREACHABLE();
+      return 1;
+    }
+  }
+}
+
+
+int Disassembler::SubstituteRegisterField(const Instruction* instr,
+                                          const char* format) {
+  char reg_prefix = format[0];
+  unsigned reg_num = 0;
+  unsigned field_len = 2;
+
+  switch (format[1]) {
+    case 'd':
+      reg_num = instr->Rd();
+      if (format[2] == 'q') {
+        reg_prefix = instr->NEONQ() ? 'X' : 'W';
+        field_len = 3;
+      }
+      break;
+    case 'n': reg_num = instr->Rn(); break;
+    case 'm':
+      reg_num = instr->Rm();
+      switch (format[2]) {
+          // Handle registers tagged with b (bytes), z (instruction), or
+          // r (registers), used for address updates in
+          // NEON load/store instructions.
+        case 'r':
+        case 'b':
+        case 'z': {
+          field_len = 3;
+          char* eimm;
+          int imm = static_cast<int>(strtol(&format[3], &eimm, 10));
+          field_len += eimm - &format[3];
+          if (reg_num == 31) {
+            switch (format[2]) {
+              case 'z':
+                imm *= (1 << instr->NEONLSSize());
+                break;
+              case 'r':
+                imm *= (instr->NEONQ() == 0) ? kDRegSizeInBytes
+                                             : kQRegSizeInBytes;
+                break;
+              case 'b':
+                break;
+            }
+            AppendToOutput("#%d", imm);
+            return field_len;
+          }
+          break;
+        }
+      }
+      break;
+    case 'e':
+      // This is register Rm, but using a 4-bit specifier. Used in NEON
+      // by-element instructions.
+      reg_num = (instr->Rm() & 0xf);
+      break;
+    case 'a': reg_num = instr->Ra(); break;
+    case 's': reg_num = instr->Rs(); break;
+    case 't':
+      reg_num = instr->Rt();
+      if (format[0] == 'V') {
+        if ((format[2] >= '2') && (format[2] <= '4')) {
+          // Handle consecutive vector register specifiers Vt2, Vt3 and Vt4.
+          reg_num = (reg_num + format[2] - '1') % 32;
+          field_len = 3;
+        }
+      } else {
+        if (format[2] == '2') {
+        // Handle register specifier Rt2.
+          reg_num = instr->Rt2();
+          field_len = 3;
+        }
+      }
+      break;
+    case '(': {
+      switch (format[2]) {
+        case 's':
+          reg_num = instr->Rs();
+          break;
+        case 't':
+          reg_num = instr->Rt();
+          break;
+        default:
+          VIXL_UNREACHABLE();
+      }
+
+      VIXL_ASSERT(format[3] == '+');
+      int i = 4;
+      int addition = 0;
+      while (format[i] != ')') {
+        VIXL_ASSERT((format[i] >= '0') && (format[i] <= '9'));
+        addition *= 10;
+        addition += format[i] - '0';
+        ++i;
+      }
+      reg_num += addition;
+      field_len = i + 1;
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+
+  // Increase field length for registers tagged as stack.
+  if (format[1] != '(' && format[2] == 's') {
+    field_len = 3;
+  }
+
+  CPURegister::RegisterType reg_type = CPURegister::kRegister;
+  unsigned reg_size = kXRegSize;
+
+  if (reg_prefix == 'R') {
+    reg_prefix = instr->SixtyFourBits() ? 'X' : 'W';
+  } else if (reg_prefix == 'F') {
+    reg_prefix = ((instr->FPType() & 1) == 0) ? 'S' : 'D';
+  }
+
+  switch (reg_prefix) {
+    case 'W':
+      reg_type = CPURegister::kRegister; reg_size = kWRegSize; break;
+    case 'X':
+      reg_type = CPURegister::kRegister; reg_size = kXRegSize; break;
+    case 'B':
+      reg_type = CPURegister::kVRegister; reg_size = kBRegSize; break;
+    case 'H':
+      reg_type = CPURegister::kVRegister; reg_size = kHRegSize; break;
+    case 'S':
+      reg_type = CPURegister::kVRegister; reg_size = kSRegSize; break;
+    case 'D':
+      reg_type = CPURegister::kVRegister; reg_size = kDRegSize; break;
+    case 'Q':
+      reg_type = CPURegister::kVRegister; reg_size = kQRegSize; break;
+    case 'V':
+      AppendToOutput("v%d", reg_num);
+      return field_len;
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  if ((reg_type == CPURegister::kRegister) &&
+      (reg_num == kZeroRegCode) && (format[2] == 's')) {
+    reg_num = kSPRegInternalCode;
+  }
+
+  AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type));
+
+  return field_len;
+}
+
+
+int Disassembler::SubstituteImmediateField(const Instruction* instr,
+                                           const char* format) {
+  VIXL_ASSERT(format[0] == 'I');
+
+  switch (format[1]) {
+    case 'M': {  // IMoveImm, IMoveNeg or IMoveLSL.
+      if (format[5] == 'L') {
+        AppendToOutput("#0x%" PRIx32, instr->ImmMoveWide());
+        if (instr->ShiftMoveWide() > 0) {
+          AppendToOutput(", lsl #%" PRId32, 16 * instr->ShiftMoveWide());
+        }
+      } else {
+        VIXL_ASSERT((format[5] == 'I') || (format[5] == 'N'));
+        uint64_t imm = static_cast<uint64_t>(instr->ImmMoveWide()) <<
+            (16 * instr->ShiftMoveWide());
+        if (format[5] == 'N')
+          imm = ~imm;
+        if (!instr->SixtyFourBits())
+          imm &= UINT64_C(0xffffffff);
+        AppendToOutput("#0x%" PRIx64, imm);
+      }
+      return 8;
+    }
+    case 'L': {
+      switch (format[2]) {
+        case 'L': {  // ILLiteral - Immediate Load Literal.
+          AppendToOutput("pc%+" PRId32,
+                         instr->ImmLLiteral() << kLiteralEntrySizeLog2);
+          return 9;
+        }
+        case 'S': {  // ILS - Immediate Load/Store.
+          if (instr->ImmLS() != 0) {
+            AppendToOutput(", #%" PRId32, instr->ImmLS());
+          }
+          return 3;
+        }
+        case 'P': {  // ILPx - Immediate Load/Store Pair, x = access size.
+          if (instr->ImmLSPair() != 0) {
+            // format[3] is the scale value. Convert to a number.
+            int scale = 1 << (format[3] - '0');
+            AppendToOutput(", #%" PRId32, instr->ImmLSPair() * scale);
+          }
+          return 4;
+        }
+        case 'U': {  // ILU - Immediate Load/Store Unsigned.
+          if (instr->ImmLSUnsigned() != 0) {
+            int shift = instr->SizeLS();
+            AppendToOutput(", #%" PRId32, instr->ImmLSUnsigned() << shift);
+          }
+          return 3;
+        }
+        default: {
+          VIXL_UNIMPLEMENTED();
+          return 0;
+        }
+      }
+    }
+    case 'C': {  // ICondB - Immediate Conditional Branch.
+      int64_t offset = instr->ImmCondBranch() << 2;
+      AppendPCRelativeOffsetToOutput(instr, offset);
+      return 6;
+    }
+    case 'A': {  // IAddSub.
+      VIXL_ASSERT(instr->ShiftAddSub() <= 1);
+      int64_t imm = instr->ImmAddSub() << (12 * instr->ShiftAddSub());
+      AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm);
+      return 7;
+    }
+    case 'F': {  // IFPSingle, IFPDouble or IFPFBits.
+      if (format[3] == 'F') {  // IFPFbits.
+        AppendToOutput("#%" PRId32, 64 - instr->FPScale());
+        return 8;
+      } else {
+        AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmFP(),
+                       format[3] == 'S' ? instr->ImmFP32() : instr->ImmFP64());
+        return 9;
+      }
+    }
+    case 'T': {  // ITri - Immediate Triangular Encoded.
+      AppendToOutput("#0x%" PRIx64, instr->ImmLogical());
+      return 4;
+    }
+    case 'N': {  // INzcv.
+      int nzcv = (instr->Nzcv() << Flags_offset);
+      AppendToOutput("#%c%c%c%c", ((nzcv & NFlag) == 0) ? 'n' : 'N',
+                                  ((nzcv & ZFlag) == 0) ? 'z' : 'Z',
+                                  ((nzcv & CFlag) == 0) ? 'c' : 'C',
+                                  ((nzcv & VFlag) == 0) ? 'v' : 'V');
+      return 5;
+    }
+    case 'P': {  // IP - Conditional compare.
+      AppendToOutput("#%" PRId32, instr->ImmCondCmp());
+      return 2;
+    }
+    case 'B': {  // Bitfields.
+      return SubstituteBitfieldImmediateField(instr, format);
+    }
+    case 'E': {  // IExtract.
+      AppendToOutput("#%" PRId32, instr->ImmS());
+      return 8;
+    }
+    case 'S': {  // IS - Test and branch bit.
+      AppendToOutput("#%" PRId32, (instr->ImmTestBranchBit5() << 5) |
+                                  instr->ImmTestBranchBit40());
+      return 2;
+    }
+    case 's': {  // Is - Shift (immediate).
+      switch (format[2]) {
+        case '1': {  // Is1 - SSHR.
+          int shift = 16 << HighestSetBitPosition(instr->ImmNEONImmh());
+          shift -= instr->ImmNEONImmhImmb();
+          AppendToOutput("#%d", shift);
+          return 3;
+        }
+        case '2': {  // Is2 - SLI.
+          int shift = instr->ImmNEONImmhImmb();
+          shift -= 8 << HighestSetBitPosition(instr->ImmNEONImmh());
+          AppendToOutput("#%d", shift);
+          return 3;
+        }
+        default: {
+          VIXL_UNIMPLEMENTED();
+          return 0;
+        }
+      }
+    }
+    case 'D': {  // IDebug - HLT and BRK instructions.
+      AppendToOutput("#0x%" PRIx32, instr->ImmException());
+      return 6;
+    }
+    case 'V': {  // Immediate Vector.
+      switch (format[2]) {
+        case 'E': {  // IVExtract.
+          AppendToOutput("#%" PRId32, instr->ImmNEONExt());
+          return 9;
+        }
+        case 'B': {  // IVByElemIndex.
+          int vm_index = (instr->NEONH() << 1) | instr->NEONL();
+          if (instr->NEONSize() == 1) {
+            vm_index = (vm_index << 1) | instr->NEONM();
+          }
+          AppendToOutput("%d", vm_index);
+          return strlen("IVByElemIndex");
+        }
+        case 'I': {  // INS element.
+          if (strncmp(format, "IVInsIndex", strlen("IVInsIndex")) == 0) {
+            int rd_index, rn_index;
+            int imm5 = instr->ImmNEON5();
+            int imm4 = instr->ImmNEON4();
+            int tz = CountTrailingZeros(imm5, 32);
+            rd_index = imm5 >> (tz + 1);
+            rn_index = imm4 >> tz;
+            if (strncmp(format, "IVInsIndex1", strlen("IVInsIndex1")) == 0) {
+              AppendToOutput("%d", rd_index);
+              return strlen("IVInsIndex1");
+            } else if (strncmp(format, "IVInsIndex2",
+                       strlen("IVInsIndex2")) == 0) {
+              AppendToOutput("%d", rn_index);
+              return strlen("IVInsIndex2");
+            } else {
+              VIXL_UNIMPLEMENTED();
+              return 0;
+            }
+          }
+          VIXL_FALLTHROUGH();
+        }
+        case 'L': {  // IVLSLane[0123] - suffix indicates access size shift.
+          AppendToOutput("%d", instr->NEONLSIndex(format[8] - '0'));
+          return 9;
+        }
+        case 'M': {  // Modified Immediate cases.
+          if (strncmp(format,
+                      "IVMIImmFPSingle",
+                      strlen("IVMIImmFPSingle")) == 0) {
+            AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmNEONabcdefgh(),
+                           instr->ImmNEONFP32());
+            return strlen("IVMIImmFPSingle");
+          } else if (strncmp(format,
+                             "IVMIImmFPDouble",
+                             strlen("IVMIImmFPDouble")) == 0) {
+            AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmNEONabcdefgh(),
+                           instr->ImmNEONFP64());
+            return strlen("IVMIImmFPDouble");
+          } else if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) {
+            uint64_t imm8 = instr->ImmNEONabcdefgh();
+            AppendToOutput("#0x%" PRIx64, imm8);
+            return strlen("IVMIImm8");
+          } else if (strncmp(format, "IVMIImm", strlen("IVMIImm")) == 0) {
+            uint64_t imm8 = instr->ImmNEONabcdefgh();
+            uint64_t imm = 0;
+            for (int i = 0; i < 8; ++i) {
+              if (imm8 & (1ULL << i)) {
+                imm |= (UINT64_C(0xff) << (8 * i));
+              }
+            }
+            AppendToOutput("#0x%" PRIx64, imm);
+            return strlen("IVMIImm");
+          } else if (strncmp(format, "IVMIShiftAmt1",
+                             strlen("IVMIShiftAmt1")) == 0) {
+            int cmode = instr->NEONCmode();
+            int shift_amount = 8 * ((cmode >> 1) & 3);
+            AppendToOutput("#%d", shift_amount);
+            return strlen("IVMIShiftAmt1");
+          } else if (strncmp(format, "IVMIShiftAmt2",
+                             strlen("IVMIShiftAmt2")) == 0) {
+            int cmode = instr->NEONCmode();
+            int shift_amount = 8 << (cmode & 1);
+            AppendToOutput("#%d", shift_amount);
+            return strlen("IVMIShiftAmt2");
+          } else {
+            VIXL_UNIMPLEMENTED();
+            return 0;
+          }
+        }
+        default: {
+          VIXL_UNIMPLEMENTED();
+          return 0;
+        }
+      }
+    }
+    case 'X': {  // IX - CLREX instruction.
+      AppendToOutput("#0x%" PRIx32, instr->CRm());
+      return 2;
+    }
+    default: {
+      VIXL_UNIMPLEMENTED();
+      return 0;
+    }
+  }
+}
+
+
+int Disassembler::SubstituteBitfieldImmediateField(const Instruction* instr,
+                                                   const char* format) {
+  VIXL_ASSERT((format[0] == 'I') && (format[1] == 'B'));
+  unsigned r = instr->ImmR();
+  unsigned s = instr->ImmS();
+
+  switch (format[2]) {
+    case 'r': {  // IBr.
+      AppendToOutput("#%d", r);
+      return 3;
+    }
+    case 's': {  // IBs+1 or IBs-r+1.
+      if (format[3] == '+') {
+        AppendToOutput("#%d", s + 1);
+        return 5;
+      } else {
+        VIXL_ASSERT(format[3] == '-');
+        AppendToOutput("#%d", s - r + 1);
+        return 7;
+      }
+    }
+    case 'Z': {  // IBZ-r.
+      VIXL_ASSERT((format[3] == '-') && (format[4] == 'r'));
+      unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize : kWRegSize;
+      AppendToOutput("#%d", reg_size - r);
+      return 5;
+    }
+    default: {
+      VIXL_UNREACHABLE();
+      return 0;
+    }
+  }
+}
+
+
+int Disassembler::SubstituteLiteralField(const Instruction* instr,
+                                         const char* format) {
+  VIXL_ASSERT(strncmp(format, "LValue", 6) == 0);
+  USE(format);
+
+  const void * address = instr->LiteralAddress<const void *>();
+  switch (instr->Mask(LoadLiteralMask)) {
+    case LDR_w_lit:
+    case LDR_x_lit:
+    case LDRSW_x_lit:
+    case LDR_s_lit:
+    case LDR_d_lit:
+    case LDR_q_lit:
+      AppendCodeRelativeDataAddressToOutput(instr, address);
+      break;
+    case PRFM_lit: {
+      // Use the prefetch hint to decide how to print the address.
+      switch (instr->PrefetchHint()) {
+        case 0x0:     // PLD: prefetch for load.
+        case 0x2:     // PST: prepare for store.
+          AppendCodeRelativeDataAddressToOutput(instr, address);
+          break;
+        case 0x1:     // PLI: preload instructions.
+          AppendCodeRelativeCodeAddressToOutput(instr, address);
+          break;
+        case 0x3:     // Unallocated hint.
+          AppendCodeRelativeAddressToOutput(instr, address);
+          break;
+      }
+      break;
+    }
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  return 6;
+}
+
+
+int Disassembler::SubstituteShiftField(const Instruction* instr,
+                                       const char* format) {
+  VIXL_ASSERT(format[0] == 'N');
+  VIXL_ASSERT(instr->ShiftDP() <= 0x3);
+
+  switch (format[1]) {
+    case 'D': {  // HDP.
+      VIXL_ASSERT(instr->ShiftDP() != ROR);
+      VIXL_FALLTHROUGH();
+    }
+    case 'L': {  // HLo.
+      if (instr->ImmDPShift() != 0) {
+        const char* shift_type[] = {"lsl", "lsr", "asr", "ror"};
+        AppendToOutput(", %s #%" PRId32, shift_type[instr->ShiftDP()],
+                       instr->ImmDPShift());
+      }
+      return 3;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      return 0;
+  }
+}
+
+
+int Disassembler::SubstituteConditionField(const Instruction* instr,
+                                           const char* format) {
+  VIXL_ASSERT(format[0] == 'C');
+  const char* condition_code[] = { "eq", "ne", "hs", "lo",
+                                   "mi", "pl", "vs", "vc",
+                                   "hi", "ls", "ge", "lt",
+                                   "gt", "le", "al", "nv" };
+  int cond;
+  switch (format[1]) {
+    case 'B': cond = instr->ConditionBranch(); break;
+    case 'I': {
+      cond = InvertCondition(static_cast<Condition>(instr->Condition()));
+      break;
+    }
+    default: cond = instr->Condition();
+  }
+  AppendToOutput("%s", condition_code[cond]);
+  return 4;
+}
+
+
+int Disassembler::SubstitutePCRelAddressField(const Instruction* instr,
+                                              const char* format) {
+  VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) ||   // Used by `adr`.
+              (strcmp(format, "AddrPCRelPage") == 0));    // Used by `adrp`.
+
+  int64_t offset = instr->ImmPCRel();
+
+  // Compute the target address based on the effective address (after applying
+  // code_address_offset). This is required for correct behaviour of adrp.
+  const Instruction* base = instr + code_address_offset();
+  if (format[9] == 'P') {
+    offset *= kPageSize;
+    base = AlignDown(base, kPageSize);
+  }
+  // Strip code_address_offset before printing, so we can use the
+  // semantically-correct AppendCodeRelativeAddressToOutput.
+  const void* target =
+      reinterpret_cast<const void*>(base + offset - code_address_offset());
+
+  AppendPCRelativeOffsetToOutput(instr, offset);
+  AppendToOutput(" ");
+  AppendCodeRelativeAddressToOutput(instr, target);
+  return 13;
+}
+
+
+int Disassembler::SubstituteBranchTargetField(const Instruction* instr,
+                                              const char* format) {
+  VIXL_ASSERT(strncmp(format, "TImm", 4) == 0);
+
+  int64_t offset = 0;
+  switch (format[5]) {
+    // BImmUncn - unconditional branch immediate.
+    case 'n': offset = instr->ImmUncondBranch(); break;
+    // BImmCond - conditional branch immediate.
+    case 'o': offset = instr->ImmCondBranch(); break;
+    // BImmCmpa - compare and branch immediate.
+    case 'm': offset = instr->ImmCmpBranch(); break;
+    // BImmTest - test and branch immediate.
+    case 'e': offset = instr->ImmTestBranch(); break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+  offset <<= kInstructionSizeLog2;
+  const void* target_address = reinterpret_cast<const void*>(instr + offset);
+  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+
+  AppendPCRelativeOffsetToOutput(instr, offset);
+  AppendToOutput(" ");
+  AppendCodeRelativeCodeAddressToOutput(instr, target_address);
+
+  return 8;
+}
+
+
+int Disassembler::SubstituteExtendField(const Instruction* instr,
+                                        const char* format) {
+  VIXL_ASSERT(strncmp(format, "Ext", 3) == 0);
+  VIXL_ASSERT(instr->ExtendMode() <= 7);
+  USE(format);
+
+  const char* extend_mode[] = { "uxtb", "uxth", "uxtw", "uxtx",
+                                "sxtb", "sxth", "sxtw", "sxtx" };
+
+  // If rd or rn is SP, uxtw on 32-bit registers and uxtx on 64-bit
+  // registers becomes lsl.
+  if (((instr->Rd() == kZeroRegCode) || (instr->Rn() == kZeroRegCode)) &&
+      (((instr->ExtendMode() == UXTW) && (instr->SixtyFourBits() == 0)) ||
+       (instr->ExtendMode() == UXTX))) {
+    if (instr->ImmExtendShift() > 0) {
+      AppendToOutput(", lsl #%" PRId32, instr->ImmExtendShift());
+    }
+  } else {
+    AppendToOutput(", %s", extend_mode[instr->ExtendMode()]);
+    if (instr->ImmExtendShift() > 0) {
+      AppendToOutput(" #%" PRId32, instr->ImmExtendShift());
+    }
+  }
+  return 3;
+}
+
+
+int Disassembler::SubstituteLSRegOffsetField(const Instruction* instr,
+                                             const char* format) {
+  VIXL_ASSERT(strncmp(format, "Offsetreg", 9) == 0);
+  const char* extend_mode[] = { "undefined", "undefined", "uxtw", "lsl",
+                                "undefined", "undefined", "sxtw", "sxtx" };
+  USE(format);
+
+  unsigned shift = instr->ImmShiftLS();
+  Extend ext = static_cast<Extend>(instr->ExtendMode());
+  char reg_type = ((ext == UXTW) || (ext == SXTW)) ? 'w' : 'x';
+
+  unsigned rm = instr->Rm();
+  if (rm == kZeroRegCode) {
+    AppendToOutput("%czr", reg_type);
+  } else {
+    AppendToOutput("%c%d", reg_type, rm);
+  }
+
+  // Extend mode UXTX is an alias for shift mode LSL here.
+  if (!((ext == UXTX) && (shift == 0))) {
+    AppendToOutput(", %s", extend_mode[ext]);
+    if (shift != 0) {
+      AppendToOutput(" #%d", instr->SizeLS());
+    }
+  }
+  return 9;
+}
+
+
+int Disassembler::SubstitutePrefetchField(const Instruction* instr,
+                                          const char* format) {
+  VIXL_ASSERT(format[0] == 'P');
+  USE(format);
+
+  static const char* hints[] = {"ld", "li", "st"};
+  static const char* stream_options[] = {"keep", "strm"};
+
+  unsigned hint = instr->PrefetchHint();
+  unsigned target = instr->PrefetchTarget() + 1;
+  unsigned stream = instr->PrefetchStream();
+
+  if ((hint >= (sizeof(hints) / sizeof(hints[0]))) || (target > 3)) {
+    // Unallocated prefetch operations.
+    int prefetch_mode = instr->ImmPrefetchOperation();
+    AppendToOutput("#0b%c%c%c%c%c",
+                   (prefetch_mode & (1 << 4)) ? '1' : '0',
+                   (prefetch_mode & (1 << 3)) ? '1' : '0',
+                   (prefetch_mode & (1 << 2)) ? '1' : '0',
+                   (prefetch_mode & (1 << 1)) ? '1' : '0',
+                   (prefetch_mode & (1 << 0)) ? '1' : '0');
+  } else {
+    VIXL_ASSERT(stream < (sizeof(stream_options) / sizeof(stream_options[0])));
+    AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]);
+  }
+  return 6;
+}
+
+int Disassembler::SubstituteBarrierField(const Instruction* instr,
+                                         const char* format) {
+  VIXL_ASSERT(format[0] == 'M');
+  USE(format);
+
+  static const char* options[4][4] = {
+    { "sy (0b0000)", "oshld", "oshst", "osh" },
+    { "sy (0b0100)", "nshld", "nshst", "nsh" },
+    { "sy (0b1000)", "ishld", "ishst", "ish" },
+    { "sy (0b1100)", "ld", "st", "sy" }
+  };
+  int domain = instr->ImmBarrierDomain();
+  int type = instr->ImmBarrierType();
+
+  AppendToOutput("%s", options[domain][type]);
+  return 1;
+}
+
+int Disassembler::SubstituteSysOpField(const Instruction* instr,
+                                       const char* format) {
+  VIXL_ASSERT(format[0] == 'G');
+  int op = -1;
+  switch (format[1]) {
+    case '1': op = instr->SysOp1(); break;
+    case '2': op = instr->SysOp2(); break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+  AppendToOutput("#%d", op);
+  return 2;
+}
+
+int Disassembler::SubstituteCrField(const Instruction* instr,
+                                    const char* format) {
+  VIXL_ASSERT(format[0] == 'K');
+  int cr = -1;
+  switch (format[1]) {
+    case 'n': cr = instr->CRn(); break;
+    case 'm': cr = instr->CRm(); break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+  AppendToOutput("C%d", cr);
+  return 2;
+}
+
+void Disassembler::ResetOutput() {
+  buffer_pos_ = 0;
+  buffer_[buffer_pos_] = 0;
+}
+
+
+void Disassembler::AppendToOutput(const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  buffer_pos_ += vsnprintf(&buffer_[buffer_pos_], buffer_size_ - buffer_pos_,
+          format, args);
+  va_end(args);
+}
+
+
+void PrintDisassembler::ProcessOutput(const Instruction* instr) {
+  fprintf(stream_, "0x%016" PRIx64 "  %08" PRIx32 "\t\t%s\n",
+          reinterpret_cast<uint64_t>(instr),
+          instr->InstructionBits(),
+          GetOutput());
+}
+
+void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr)
+{
+    vixl::Disassembler disasm(buffer, bufsize-1);
+    vixl::Decoder decoder;
+    decoder.AppendVisitor(&disasm);
+    decoder.Decode(instr);
+    buffer[bufsize-1] = 0;      // Just to be safe
+}
+
+char* GdbDisassembleInstruction(const Instruction* instr)
+{
+    static char buffer[1024];
+    DisassembleInstruction(buffer, sizeof(buffer), instr);
+    return buffer;
+}
+
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Disasm-vixl.h b/js/src/jit/arm64/vixl/Disasm-vixl.h
new file mode 100644
index 0000000000..e04730da83
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Disasm-vixl.h
@@ -0,0 +1,181 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_DISASM_A64_H
+#define VIXL_A64_DISASM_A64_H
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+#include "jit/arm64/vixl/Decoder-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+namespace vixl {
+
+class Disassembler: public DecoderVisitor {
+ public:
+  Disassembler();
+  Disassembler(char* text_buffer, int buffer_size);
+  virtual ~Disassembler();
+  char* GetOutput();
+
+  // Declare all Visitor functions.
+  #define DECLARE(A) virtual void Visit##A(const Instruction* instr) override;
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+ protected:
+  virtual void ProcessOutput(const Instruction* instr);
+
+  // Default output functions.  The functions below implement a default way of
+  // printing elements in the disassembly. A sub-class can override these to
+  // customize the disassembly output.
+
+  // Prints the name of a register.
+  // TODO: This currently doesn't allow renaming of V registers.
+  virtual void AppendRegisterNameToOutput(const Instruction* instr,
+                                          const CPURegister& reg);
+
+  // Prints a PC-relative offset. This is used for example when disassembling
+  // branches to immediate offsets.
+  virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                              int64_t offset);
+
+  // Prints an address, in the general case. It can be code or data. This is
+  // used for example to print the target address of an ADR instruction.
+  virtual void AppendCodeRelativeAddressToOutput(const Instruction* instr,
+                                                 const void* addr);
+
+  // Prints the address of some code.
+  // This is used for example to print the target address of a branch to an
+  // immediate offset.
+  // A sub-class can for example override this method to lookup the address and
+  // print an appropriate name.
+  virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr,
+                                                     const void* addr);
+
+  // Prints the address of some data.
+  // This is used for example to print the source address of a load literal
+  // instruction.
+  virtual void AppendCodeRelativeDataAddressToOutput(const Instruction* instr,
+                                                     const void* addr);
+
+  // Same as the above, but for addresses that are not relative to the code
+  // buffer. They are currently not used by VIXL.
+  virtual void AppendAddressToOutput(const Instruction* instr,
+                                     const void* addr);
+  virtual void AppendCodeAddressToOutput(const Instruction* instr,
+                                         const void* addr);
+  virtual void AppendDataAddressToOutput(const Instruction* instr,
+                                         const void* addr);
+
+ public:
+  // Get/Set the offset that should be added to code addresses when printing
+  // code-relative addresses in the AppendCodeRelative<Type>AddressToOutput()
+  // helpers.
+  // Below is an example of how a branch immediate instruction in memory at
+  // address 0xb010200 would disassemble with different offsets.
+  // Base address | Disassembly
+  //          0x0 | 0xb010200:  b #+0xcc  (addr 0xb0102cc)
+  //      0x10000 | 0xb000200:  b #+0xcc  (addr 0xb0002cc)
+  //    0xb010200 |       0x0:  b #+0xcc  (addr 0xcc)
+  void MapCodeAddress(int64_t base_address, const Instruction* instr_address);
+  int64_t CodeRelativeAddress(const void* instr);
+
+ private:
+  void Format(
+      const Instruction* instr, const char* mnemonic, const char* format);
+  void Substitute(const Instruction* instr, const char* string);
+  int SubstituteField(const Instruction* instr, const char* format);
+  int SubstituteRegisterField(const Instruction* instr, const char* format);
+  int SubstituteImmediateField(const Instruction* instr, const char* format);
+  int SubstituteLiteralField(const Instruction* instr, const char* format);
+  int SubstituteBitfieldImmediateField(
+      const Instruction* instr, const char* format);
+  int SubstituteShiftField(const Instruction* instr, const char* format);
+  int SubstituteExtendField(const Instruction* instr, const char* format);
+  int SubstituteConditionField(const Instruction* instr, const char* format);
+  int SubstitutePCRelAddressField(const Instruction* instr, const char* format);
+  int SubstituteBranchTargetField(const Instruction* instr, const char* format);
+  int SubstituteLSRegOffsetField(const Instruction* instr, const char* format);
+  int SubstitutePrefetchField(const Instruction* instr, const char* format);
+  int SubstituteBarrierField(const Instruction* instr, const char* format);
+  int SubstituteSysOpField(const Instruction* instr, const char* format);
+  int SubstituteCrField(const Instruction* instr, const char* format);
+  bool RdIsZROrSP(const Instruction* instr) const {
+    return (instr->Rd() == kZeroRegCode);
+  }
+
+  bool RnIsZROrSP(const Instruction* instr) const {
+    return (instr->Rn() == kZeroRegCode);
+  }
+
+  bool RmIsZROrSP(const Instruction* instr) const {
+    return (instr->Rm() == kZeroRegCode);
+  }
+
+  bool RaIsZROrSP(const Instruction* instr) const {
+    return (instr->Ra() == kZeroRegCode);
+  }
+
+  bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
+
+  int64_t code_address_offset() const { return code_address_offset_; }
+
+ protected:
+  void ResetOutput();
+  void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);
+
+  void set_code_address_offset(int64_t code_address_offset) {
+    code_address_offset_ = code_address_offset;
+  }
+
+  char* buffer_;
+  uint32_t buffer_pos_;
+  uint32_t buffer_size_;
+  bool own_buffer_;
+
+  int64_t code_address_offset_;
+};
+
+
+class PrintDisassembler: public Disassembler {
+ public:
+  explicit PrintDisassembler(FILE* stream) : stream_(stream) { }
+
+ protected:
+  virtual void ProcessOutput(const Instruction* instr) override;
+
+ private:
+  FILE *stream_;
+};
+
+void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr);
+char* GdbDisassembleInstruction(const Instruction* instr);
+
+}  // namespace vixl
+
+#endif  // VIXL_A64_DISASM_A64_H
diff --git a/js/src/jit/arm64/vixl/Globals-vixl.h b/js/src/jit/arm64/vixl/Globals-vixl.h
new file mode 100644
index 0000000000..2c7d5703f1
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Globals-vixl.h
@@ -0,0 +1,272 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_GLOBALS_H
+#define VIXL_GLOBALS_H
+
+// Get standard C99 macros for integer types.
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include "mozilla/Assertions.h"
+
+#include <cstdarg>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+
+extern "C" {
+#include <inttypes.h>
+#include <stdint.h>
+}
+
+#include "jstypes.h"
+
+#include "jit/arm64/vixl/Platform-vixl.h"
+#include "js/Utility.h"
+
+#ifdef VIXL_NEGATIVE_TESTING
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#endif
+
+namespace vixl {
+
+typedef uint8_t byte;
+
+const int KBytes = 1024;
+const int MBytes = 1024 * KBytes;
+
+const int kBitsPerByte = 8;
+
+template <int SizeInBits>
+struct Unsigned;
+
+template <>
+struct Unsigned<32> {
+  typedef uint32_t type;
+};
+
+template <>
+struct Unsigned<64> {
+  typedef uint64_t type;
+};
+
+}  // namespace vixl
+
+// Detect the host's pointer size.
+#if (UINTPTR_MAX == UINT32_MAX)
+#define VIXL_HOST_POINTER_32
+#elif (UINTPTR_MAX == UINT64_MAX)
+#define VIXL_HOST_POINTER_64
+#else
+#error "Unsupported host pointer size."
+#endif
+
+#ifdef VIXL_NEGATIVE_TESTING
+#define VIXL_ABORT()                                                         \
+  do {                                                                       \
+    std::ostringstream oss;                                                  \
+    oss << "Aborting in " << __FILE__ << ", line " << __LINE__ << std::endl; \
+    throw std::runtime_error(oss.str());                                     \
+  } while (false)
+#define VIXL_ABORT_WITH_MSG(msg)                                             \
+  do {                                                                       \
+    std::ostringstream oss;                                                  \
+    oss << (msg) << "in " << __FILE__ << ", line " << __LINE__ << std::endl; \
+    throw std::runtime_error(oss.str());                                     \
+  } while (false)
+#define VIXL_CHECK(condition)                                \
+  do {                                                       \
+    if (!(condition)) {                                      \
+      std::ostringstream oss;                                \
+      oss << "Assertion failed (" #condition ")\nin ";       \
+      oss << __FILE__ << ", line " << __LINE__ << std::endl; \
+      throw std::runtime_error(oss.str());                   \
+    }                                                        \
+  } while (false)
+#else
+#define VIXL_ABORT()                                         \
+  do {                                                       \
+    MOZ_CRASH();                                             \
+  } while (false)
+#define VIXL_ABORT_WITH_MSG(msg)                             \
+  do {                                                       \
+    MOZ_CRASH(msg);                                          \
+  } while (false)
+#define VIXL_CHECK(condition)                           \
+  do {                                                  \
+    if (!(condition)) {                                 \
+      MOZ_CRASH();                                      \
+    }                                                   \
+  } while (false)
+#endif
+#ifdef DEBUG
+#define VIXL_ASSERT(condition) MOZ_ASSERT(condition)
+#define VIXL_UNIMPLEMENTED()               \
+  do {                                     \
+    VIXL_ABORT_WITH_MSG("UNIMPLEMENTED "); \
+  } while (false)
+#define VIXL_UNREACHABLE()               \
+  do {                                   \
+    VIXL_ABORT_WITH_MSG("UNREACHABLE "); \
+  } while (false)
+#else
+#define VIXL_ASSERT(condition) ((void)0)
+#define VIXL_UNIMPLEMENTED() ((void)0)
+#define VIXL_UNREACHABLE() MOZ_CRASH("vixl unreachable")
+#endif
+// This is not as powerful as template based assertions, but it is simple.
+// It assumes that the descriptions are unique. If this starts being a problem,
+// we can switch to a different implemention.
+#define VIXL_CONCAT(a, b) a##b
+#if __cplusplus >= 201103L
+#define VIXL_STATIC_ASSERT_LINE(line_unused, condition, message) \
+  static_assert(condition, message)
+#else
+#define VIXL_STATIC_ASSERT_LINE(line, condition, message_unused)            \
+  typedef char VIXL_CONCAT(STATIC_ASSERT_LINE_, line)[(condition) ? 1 : -1] \
+      __attribute__((unused))
+#endif
+#define VIXL_STATIC_ASSERT(condition) \
+  VIXL_STATIC_ASSERT_LINE(__LINE__, condition, "")
+#define VIXL_STATIC_ASSERT_MESSAGE(condition, message) \
+  VIXL_STATIC_ASSERT_LINE(__LINE__, condition, message)
+
+#define VIXL_WARNING(message)                                          \
+  do {                                                                 \
+    printf("WARNING in %s, line %i: %s", __FILE__, __LINE__, message); \
+  } while (false)
+
+template <typename T1>
+inline void USE(const T1&) {}
+
+template <typename T1, typename T2>
+inline void USE(const T1&, const T2&) {}
+
+template <typename T1, typename T2, typename T3>
+inline void USE(const T1&, const T2&, const T3&) {}
+
+template <typename T1, typename T2, typename T3, typename T4>
+inline void USE(const T1&, const T2&, const T3&, const T4&) {}
+
+#define VIXL_ALIGNMENT_EXCEPTION()                \
+  do {                                            \
+    VIXL_ABORT_WITH_MSG("ALIGNMENT EXCEPTION\t"); \
+  } while (0)
+
+// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough
+// argument to annotate intentional fall-through between switch labels.
+// For more information please refer to:
+// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+#ifndef __has_warning
+#define __has_warning(x) 0
+#endif
+
+// Fallthrough annotation for Clang and C++11(201103L).
+#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
+#define VIXL_FALLTHROUGH() [[clang::fallthrough]]
+// Fallthrough annotation for GCC >= 7.
+#elif __GNUC__ >= 7
+#define VIXL_FALLTHROUGH() __attribute__((fallthrough))
+#else
+#define VIXL_FALLTHROUGH() \
+  do {                     \
+  } while (0)
+#endif
+
+#if __cplusplus >= 201103L
+#define VIXL_NO_RETURN [[noreturn]]
+#else
+#define VIXL_NO_RETURN __attribute__((noreturn))
+#endif
+#ifdef VIXL_DEBUG
+#define VIXL_NO_RETURN_IN_DEBUG_MODE VIXL_NO_RETURN
+#else
+#define VIXL_NO_RETURN_IN_DEBUG_MODE
+#endif
+
+#if __cplusplus >= 201103L
+#define VIXL_OVERRIDE override
+#else
+#define VIXL_OVERRIDE
+#endif
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 1
+#endif
+#else
+#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 0
+#endif
+#if VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#warning "Generating Simulator instructions without Simulator support."
+#endif
+#endif
+
+// We do not have a simulator for AArch32, although we can pretend we do so that
+// tests that require running natively can be skipped.
+#ifndef __arm__
+#define VIXL_INCLUDE_SIMULATOR_AARCH32
+#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 1
+#endif
+#else
+#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 0
+#endif
+#endif
+
+// Target Architecture/ISA
+
+// Hack: always include AArch64.
+#define VIXL_INCLUDE_TARGET_A64
+
+#ifdef VIXL_INCLUDE_TARGET_A64
+#define VIXL_INCLUDE_TARGET_AARCH64
+#endif
+
+#if defined(VIXL_INCLUDE_TARGET_A32) && defined(VIXL_INCLUDE_TARGET_T32)
+#define VIXL_INCLUDE_TARGET_AARCH32
+#elif defined(VIXL_INCLUDE_TARGET_A32)
+#define VIXL_INCLUDE_TARGET_A32_ONLY
+#else
+#define VIXL_INCLUDE_TARGET_T32_ONLY
+#endif
+
+
+#endif  // VIXL_GLOBALS_H
diff --git a/js/src/jit/arm64/vixl/Instructions-vixl.cpp b/js/src/jit/arm64/vixl/Instructions-vixl.cpp
new file mode 100644
index 0000000000..dcc0fab05e
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Instructions-vixl.cpp
@@ -0,0 +1,627 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Instructions-vixl.h"
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+
+namespace vixl {
+
+static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
+                                    uint64_t value,
+                                    unsigned width) {
+  VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
+              (width == 32));
+  VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+  uint64_t result = value & ((UINT64_C(1) << width) - 1);
+  for (unsigned i = width; i < reg_size; i *= 2) {
+    result |= (result << i);
+  }
+  return result;
+}
+
+
+bool Instruction::IsLoad() const {
+  if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
+    return false;
+  }
+
+  if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) {
+    return Mask(LoadStorePairLBit) != 0;
+  } else {
+    LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreMask));
+    switch (op) {
+      case LDRB_w:
+      case LDRH_w:
+      case LDR_w:
+      case LDR_x:
+      case LDRSB_w:
+      case LDRSB_x:
+      case LDRSH_w:
+      case LDRSH_x:
+      case LDRSW_x:
+      case LDR_b:
+      case LDR_h:
+      case LDR_s:
+      case LDR_d:
+      case LDR_q: return true;
+      default: return false;
+    }
+  }
+}
+
+
+bool Instruction::IsStore() const {
+  if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
+    return false;
+  }
+
+  if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) {
+    return Mask(LoadStorePairLBit) == 0;
+  } else {
+    LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreMask));
+    switch (op) {
+      case STRB_w:
+      case STRH_w:
+      case STR_w:
+      case STR_x:
+      case STR_b:
+      case STR_h:
+      case STR_s:
+      case STR_d:
+      case STR_q: return true;
+      default: return false;
+    }
+  }
+}
+
+
+// Logical immediates can't encode zero, so a return value of zero is used to
+// indicate a failure case. Specifically, where the constraints on imm_s are
+// not met.
+uint64_t Instruction::ImmLogical() const {
+  unsigned reg_size = SixtyFourBits() ? kXRegSize : kWRegSize;
+  int32_t n = BitN();
+  int32_t imm_s = ImmSetBits();
+  int32_t imm_r = ImmRotate();
+
+  // An integer is constructed from the n, imm_s and imm_r bits according to
+  // the following table:
+  //
+  //  N   imms    immr    size        S             R
+  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+  //
+
+  if (n == 1) {
+    if (imm_s == 0x3f) {
+      return 0;
+    }
+    uint64_t bits = (UINT64_C(1) << (imm_s + 1)) - 1;
+    return RotateRight(bits, imm_r, 64);
+  } else {
+    if ((imm_s >> 1) == 0x1f) {
+      return 0;
+    }
+    for (int width = 0x20; width >= 0x2; width >>= 1) {
+      if ((imm_s & width) == 0) {
+        int mask = width - 1;
+        if ((imm_s & mask) == mask) {
+          return 0;
+        }
+        uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1;
+        return RepeatBitsAcrossReg(reg_size,
+                                   RotateRight(bits, imm_r & mask, width),
+                                   width);
+      }
+    }
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+
+uint32_t Instruction::ImmNEONabcdefgh() const {
+  return ImmNEONabc() << 5 | ImmNEONdefgh();
+}
+
+
+float Instruction::Imm8ToFP32(uint32_t imm8) {
+  //   Imm8: abcdefgh (8 bits)
+  // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
+  // where B is b ^ 1
+  uint32_t bits = imm8;
+  uint32_t bit7 = (bits >> 7) & 0x1;
+  uint32_t bit6 = (bits >> 6) & 0x1;
+  uint32_t bit5_to_0 = bits & 0x3f;
+  uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19);
+
+  return RawbitsToFloat(result);
+}
+
+
+float Instruction::ImmFP32() const {
+  return Imm8ToFP32(ImmFP());
+}
+
+
+double Instruction::Imm8ToFP64(uint32_t imm8) {
+  //   Imm8: abcdefgh (8 bits)
+  // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+  //         0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
+  // where B is b ^ 1
+  uint32_t bits = imm8;
+  uint64_t bit7 = (bits >> 7) & 0x1;
+  uint64_t bit6 = (bits >> 6) & 0x1;
+  uint64_t bit5_to_0 = bits & 0x3f;
+  uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48);
+
+  return RawbitsToDouble(result);
+}
+
+
+double Instruction::ImmFP64() const {
+  return Imm8ToFP64(ImmFP());
+}
+
+
+float Instruction::ImmNEONFP32() const {
+  return Imm8ToFP32(ImmNEONabcdefgh());
+}
+
+
+double Instruction::ImmNEONFP64() const {
+  return Imm8ToFP64(ImmNEONabcdefgh());
+}
+
+unsigned CalcLSPairDataSize(LoadStorePairOp op) {
+  VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
+  VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
+  switch (op) {
+    case STP_q:
+    case LDP_q: return kQRegSizeInBytesLog2;
+    case STP_x:
+    case LDP_x:
+    case STP_d:
+    case LDP_d: return kXRegSizeInBytesLog2;
+    default: return kWRegSizeInBytesLog2;
+  }
+}
+
+
+int Instruction::ImmBranchRangeBitwidth(ImmBranchType branch_type) {
+  switch (branch_type) {
+    case UncondBranchType:
+      return ImmUncondBranch_width;
+    case CondBranchType:
+      return ImmCondBranch_width;
+    case CompareBranchType:
+      return ImmCmpBranch_width;
+    case TestBranchType:
+      return ImmTestBranch_width;
+    default:
+      VIXL_UNREACHABLE();
+      return 0;
+  }
+}
+
+
+int32_t Instruction::ImmBranchForwardRange(ImmBranchType branch_type) {
+  int32_t encoded_max = 1 << (ImmBranchRangeBitwidth(branch_type) - 1);
+  return encoded_max * kInstructionSize;
+}
+
+
+bool Instruction::IsValidImmPCOffset(ImmBranchType branch_type,
+                                     int64_t offset) {
+  return IsIntN(ImmBranchRangeBitwidth(branch_type), offset);
+}
+
+ImmBranchRangeType Instruction::ImmBranchTypeToRange(ImmBranchType branch_type)
+{
+  switch (branch_type) {
+    case UncondBranchType:
+      return UncondBranchRangeType;
+    case CondBranchType:
+    case CompareBranchType:
+      return CondBranchRangeType;
+    case TestBranchType:
+      return TestBranchRangeType;
+    default:
+      return UnknownBranchRangeType;
+  }
+}
+
+int32_t Instruction::ImmBranchMaxForwardOffset(ImmBranchRangeType range_type)
+{
+  // Branches encode a pc-relative two's complement number of 32-bit
+  // instructions. Compute the number of bytes corresponding to the largest
+  // positive number of instructions that can be encoded.
+  switch(range_type) {
+    case TestBranchRangeType:
+      return ((1 << ImmTestBranch_width) - 1) / 2 * kInstructionSize;
+    case CondBranchRangeType:
+      return ((1 << ImmCondBranch_width) - 1) / 2 * kInstructionSize;
+    case UncondBranchRangeType:
+      return ((1 << ImmUncondBranch_width) - 1) / 2 * kInstructionSize;
+    default:
+      VIXL_UNREACHABLE();
+      return 0;
+  }
+}
+
+int32_t Instruction::ImmBranchMinBackwardOffset(ImmBranchRangeType range_type)
+{
+  switch(range_type) {
+    case TestBranchRangeType:
+      return -int32_t(1 << ImmTestBranch_width) / int32_t(2 * kInstructionSize);
+    case CondBranchRangeType:
+      return -int32_t(1 << ImmCondBranch_width) / int32_t(2 * kInstructionSize);
+    case UncondBranchRangeType:
+      return -int32_t(1 << ImmUncondBranch_width) / int32_t(2 * kInstructionSize);
+    default:
+      VIXL_UNREACHABLE();
+      return 0;
+  }
+}
+
+const Instruction* Instruction::ImmPCOffsetTarget() const {
+  const Instruction * base = this;
+  ptrdiff_t offset;
+  if (IsPCRelAddressing()) {
+    // ADR and ADRP.
+    offset = ImmPCRel();
+    if (Mask(PCRelAddressingMask) == ADRP) {
+      base = AlignDown(base, kPageSize);
+      offset *= kPageSize;
+    } else {
+      VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR);
+    }
+  } else {
+    // All PC-relative branches.
+    VIXL_ASSERT(BranchType() != UnknownBranchType);
+    // Relative branch offsets are instruction-size-aligned.
+    offset = ImmBranch() << kInstructionSizeLog2;
+  }
+  return base + offset;
+}
+
+
+int Instruction::ImmBranch() const {
+  switch (BranchType()) {
+    case CondBranchType: return ImmCondBranch();
+    case UncondBranchType: return ImmUncondBranch();
+    case CompareBranchType: return ImmCmpBranch();
+    case TestBranchType: return ImmTestBranch();
+    default: VIXL_UNREACHABLE();
+  }
+  return 0;
+}
+
+
+void Instruction::SetImmPCOffsetTarget(const Instruction* target) {
+  if (IsPCRelAddressing()) {
+    SetPCRelImmTarget(target);
+  } else {
+    SetBranchImmTarget(target);
+  }
+}
+
+
+void Instruction::SetPCRelImmTarget(const Instruction* target) {
+  ptrdiff_t imm21;
+  if ((Mask(PCRelAddressingMask) == ADR)) {
+    imm21 = target - this;
+  } else {
+    VIXL_ASSERT(Mask(PCRelAddressingMask) == ADRP);
+    uintptr_t this_page = reinterpret_cast<uintptr_t>(this) / kPageSize;
+    uintptr_t target_page = reinterpret_cast<uintptr_t>(target) / kPageSize;
+    imm21 = target_page - this_page;
+  }
+  Instr imm = Assembler::ImmPCRelAddress(static_cast<int32_t>(imm21));
+
+  SetInstructionBits(Mask(~ImmPCRel_mask) | imm);
+}
+
+
+void Instruction::SetBranchImmTarget(const Instruction* target) {
+  VIXL_ASSERT(((target - this) & 3) == 0);
+  Instr branch_imm = 0;
+  uint32_t imm_mask = 0;
+  int offset = static_cast<int>((target - this) >> kInstructionSizeLog2);
+  switch (BranchType()) {
+    case CondBranchType: {
+      branch_imm = Assembler::ImmCondBranch(offset);
+      imm_mask = ImmCondBranch_mask;
+      break;
+    }
+    case UncondBranchType: {
+      branch_imm = Assembler::ImmUncondBranch(offset);
+      imm_mask = ImmUncondBranch_mask;
+      break;
+    }
+    case CompareBranchType: {
+      branch_imm = Assembler::ImmCmpBranch(offset);
+      imm_mask = ImmCmpBranch_mask;
+      break;
+    }
+    case TestBranchType: {
+      branch_imm = Assembler::ImmTestBranch(offset);
+      imm_mask = ImmTestBranch_mask;
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+  SetInstructionBits(Mask(~imm_mask) | branch_imm);
+}
+
+
+void Instruction::SetImmLLiteral(const Instruction* source) {
+  VIXL_ASSERT(IsWordAligned(source));
+  ptrdiff_t offset = (source - this) >> kLiteralEntrySizeLog2;
+  Instr imm = Assembler::ImmLLiteral(static_cast<int>(offset));
+  Instr mask = ImmLLiteral_mask;
+
+  SetInstructionBits(Mask(~mask) | imm);
+}
+
+
+VectorFormat VectorFormatHalfWidth(const VectorFormat vform) {
+  VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D ||
+              vform == kFormatH || vform == kFormatS || vform == kFormatD);
+  switch (vform) {
+    case kFormat8H: return kFormat8B;
+    case kFormat4S: return kFormat4H;
+    case kFormat2D: return kFormat2S;
+    case kFormatH:  return kFormatB;
+    case kFormatS:  return kFormatH;
+    case kFormatD:  return kFormatS;
+    default: VIXL_UNREACHABLE(); return kFormatUndefined;
+  }
+}
+
+
+VectorFormat VectorFormatDoubleWidth(const VectorFormat vform) {
+  VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S ||
+              vform == kFormatB || vform == kFormatH || vform == kFormatS);
+  switch (vform) {
+    case kFormat8B: return kFormat8H;
+    case kFormat4H: return kFormat4S;
+    case kFormat2S: return kFormat2D;
+    case kFormatB:  return kFormatH;
+    case kFormatH:  return kFormatS;
+    case kFormatS:  return kFormatD;
+    default: VIXL_UNREACHABLE(); return kFormatUndefined;
+  }
+}
+
+
+VectorFormat VectorFormatFillQ(const VectorFormat vform) {
+  switch (vform) {
+    case kFormatB:
+    case kFormat8B:
+    case kFormat16B: return kFormat16B;
+    case kFormatH:
+    case kFormat4H:
+    case kFormat8H:  return kFormat8H;
+    case kFormatS:
+    case kFormat2S:
+    case kFormat4S:  return kFormat4S;
+    case kFormatD:
+    case kFormat1D:
+    case kFormat2D:  return kFormat2D;
+    default: VIXL_UNREACHABLE(); return kFormatUndefined;
+  }
+}
+
+VectorFormat VectorFormatHalfWidthDoubleLanes(const VectorFormat vform) {
+  switch (vform) {
+    case kFormat4H: return kFormat8B;
+    case kFormat8H: return kFormat16B;
+    case kFormat2S: return kFormat4H;
+    case kFormat4S: return kFormat8H;
+    case kFormat1D: return kFormat2S;
+    case kFormat2D: return kFormat4S;
+    default: VIXL_UNREACHABLE(); return kFormatUndefined;
+  }
+}
+
+VectorFormat VectorFormatDoubleLanes(const VectorFormat vform) {
+  VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S);
+  switch (vform) {
+    case kFormat8B: return kFormat16B;
+    case kFormat4H: return kFormat8H;
+    case kFormat2S: return kFormat4S;
+    default: VIXL_UNREACHABLE(); return kFormatUndefined;
+  }
+}
+
+
+VectorFormat VectorFormatHalfLanes(const VectorFormat vform) {
+  VIXL_ASSERT(vform == kFormat16B || vform == kFormat8H || vform == kFormat4S);
+  switch (vform) {
+    case kFormat16B: return kFormat8B;
+    case kFormat8H: return kFormat4H;
+    case kFormat4S: return kFormat2S;
+    default: VIXL_UNREACHABLE(); return kFormatUndefined;
+  }
+}
+
+
+VectorFormat ScalarFormatFromLaneSize(int laneSize) {
+  switch (laneSize) {
+    case 8:  return kFormatB;
+    case 16: return kFormatH;
+    case 32: return kFormatS;
+    case 64: return kFormatD;
+    default: VIXL_UNREACHABLE(); return kFormatUndefined;
+  }
+}
+
+
+unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
+  VIXL_ASSERT(vform != kFormatUndefined);
+  switch (vform) {
+    case kFormatB: return kBRegSize;
+    case kFormatH: return kHRegSize;
+    case kFormatS: return kSRegSize;
+    case kFormatD: return kDRegSize;
+    case kFormat8B:
+    case kFormat4H:
+    case kFormat2S:
+    case kFormat1D: return kDRegSize;
+    default: return kQRegSize;
+  }
+}
+
+
+unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) {
+  return RegisterSizeInBitsFromFormat(vform) / 8;
+}
+
+
+unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
+  VIXL_ASSERT(vform != kFormatUndefined);
+  switch (vform) {
+    case kFormatB:
+    case kFormat8B:
+    case kFormat16B: return 8;
+    case kFormatH:
+    case kFormat4H:
+    case kFormat8H: return 16;
+    case kFormatS:
+    case kFormat2S:
+    case kFormat4S: return 32;
+    case kFormatD:
+    case kFormat1D:
+    case kFormat2D: return 64;
+    default: VIXL_UNREACHABLE(); return 0;
+  }
+}
+
+
+int LaneSizeInBytesFromFormat(VectorFormat vform) {
+  return LaneSizeInBitsFromFormat(vform) / 8;
+}
+
+
+int LaneSizeInBytesLog2FromFormat(VectorFormat vform) {
+  VIXL_ASSERT(vform != kFormatUndefined);
+  switch (vform) {
+    case kFormatB:
+    case kFormat8B:
+    case kFormat16B: return 0;
+    case kFormatH:
+    case kFormat4H:
+    case kFormat8H: return 1;
+    case kFormatS:
+    case kFormat2S:
+    case kFormat4S: return 2;
+    case kFormatD:
+    case kFormat1D:
+    case kFormat2D: return 3;
+    default: VIXL_UNREACHABLE(); return 0;
+  }
+}
+
+
+int LaneCountFromFormat(VectorFormat vform) {
+  VIXL_ASSERT(vform != kFormatUndefined);
+  switch (vform) {
+    case kFormat16B: return 16;
+    case kFormat8B:
+    case kFormat8H: return 8;
+    case kFormat4H:
+    case kFormat4S: return 4;
+    case kFormat2S:
+    case kFormat2D: return 2;
+    case kFormat1D:
+    case kFormatB:
+    case kFormatH:
+    case kFormatS:
+    case kFormatD: return 1;
+    default: VIXL_UNREACHABLE(); return 0;
+  }
+}
+
+
+int MaxLaneCountFromFormat(VectorFormat vform) {
+  VIXL_ASSERT(vform != kFormatUndefined);
+  switch (vform) {
+    case kFormatB:
+    case kFormat8B:
+    case kFormat16B: return 16;
+    case kFormatH:
+    case kFormat4H:
+    case kFormat8H: return 8;
+    case kFormatS:
+    case kFormat2S:
+    case kFormat4S: return 4;
+    case kFormatD:
+    case kFormat1D:
+    case kFormat2D: return 2;
+    default: VIXL_UNREACHABLE(); return 0;
+  }
+}
+
+
+// Does 'vform' indicate a vector format or a scalar format?
+bool IsVectorFormat(VectorFormat vform) {
+  VIXL_ASSERT(vform != kFormatUndefined);
+  switch (vform) {
+    case kFormatB:
+    case kFormatH:
+    case kFormatS:
+    case kFormatD: return false;
+    default: return true;
+  }
+}
+
+
+int64_t MaxIntFromFormat(VectorFormat vform) {
+  return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+}
+
+
+int64_t MinIntFromFormat(VectorFormat vform) {
+  return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform));
+}
+
+
+uint64_t MaxUintFromFormat(VectorFormat vform) {
+  return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+}
+}  // namespace vixl
+
diff --git a/js/src/jit/arm64/vixl/Instructions-vixl.h b/js/src/jit/arm64/vixl/Instructions-vixl.h
new file mode 100644
index 0000000000..4bcddf642a
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Instructions-vixl.h
@@ -0,0 +1,817 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_INSTRUCTIONS_A64_H_
+#define VIXL_A64_INSTRUCTIONS_A64_H_
+
+#include "jit/arm64/vixl/Constants-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+namespace vixl {
+// ISA constants. --------------------------------------------------------------
+
+typedef uint32_t Instr;
+const unsigned kInstructionSize = 4;
+const unsigned kInstructionSizeLog2 = 2;
+const unsigned kLiteralEntrySize = 4;
+const unsigned kLiteralEntrySizeLog2 = 2;
+const unsigned kMaxLoadLiteralRange = 1 * MBytes;
+
+// This is the nominal page size (as used by the adrp instruction); the actual
+// size of the memory pages allocated by the kernel is likely to differ.
+const unsigned kPageSize = 4 * KBytes;
+const unsigned kPageSizeLog2 = 12;
+
+const unsigned kBRegSize = 8;
+const unsigned kBRegSizeLog2 = 3;
+const unsigned kBRegSizeInBytes = kBRegSize / 8;
+const unsigned kBRegSizeInBytesLog2 = kBRegSizeLog2 - 3;
+const unsigned kHRegSize = 16;
+const unsigned kHRegSizeLog2 = 4;
+const unsigned kHRegSizeInBytes = kHRegSize / 8;
+const unsigned kHRegSizeInBytesLog2 = kHRegSizeLog2 - 3;
+const unsigned kWRegSize = 32;
+const unsigned kWRegSizeLog2 = 5;
+const unsigned kWRegSizeInBytes = kWRegSize / 8;
+const unsigned kWRegSizeInBytesLog2 = kWRegSizeLog2 - 3;
+const unsigned kXRegSize = 64;
+const unsigned kXRegSizeLog2 = 6;
+const unsigned kXRegSizeInBytes = kXRegSize / 8;
+const unsigned kXRegSizeInBytesLog2 = kXRegSizeLog2 - 3;
+const unsigned kSRegSize = 32;
+const unsigned kSRegSizeLog2 = 5;
+const unsigned kSRegSizeInBytes = kSRegSize / 8;
+const unsigned kSRegSizeInBytesLog2 = kSRegSizeLog2 - 3;
+const unsigned kDRegSize = 64;
+const unsigned kDRegSizeLog2 = 6;
+const unsigned kDRegSizeInBytes = kDRegSize / 8;
+const unsigned kDRegSizeInBytesLog2 = kDRegSizeLog2 - 3;
+const unsigned kQRegSize = 128;
+const unsigned kQRegSizeLog2 = 7;
+const unsigned kQRegSizeInBytes = kQRegSize / 8;
+const unsigned kQRegSizeInBytesLog2 = kQRegSizeLog2 - 3;
+const uint64_t kWRegMask = UINT64_C(0xffffffff);
+const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kSRegMask = UINT64_C(0xffffffff);
+const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kSSignMask = UINT64_C(0x80000000);
+const uint64_t kDSignMask = UINT64_C(0x8000000000000000);
+const uint64_t kWSignMask = UINT64_C(0x80000000);
+const uint64_t kXSignMask = UINT64_C(0x8000000000000000);
+const uint64_t kByteMask = UINT64_C(0xff);
+const uint64_t kHalfWordMask = UINT64_C(0xffff);
+const uint64_t kWordMask = UINT64_C(0xffffffff);
+const uint64_t kXMaxUInt = UINT64_C(0xffffffffffffffff);
+const uint64_t kXMaxExactUInt = UINT64_C(0xfffffffffffff800);
+const uint64_t kWMaxUInt = UINT64_C(0xffffffff);
+const int64_t kXMaxInt = INT64_C(0x7fffffffffffffff);
+const int64_t kXMaxExactInt = UINT64_C(0x7ffffffffffffc00);
+const int64_t kXMinInt = INT64_C(0x8000000000000000);
+const int32_t kWMaxInt = INT32_C(0x7fffffff);
+const int32_t kWMinInt = INT32_C(0x80000000);
+const unsigned kLinkRegCode = 30;
+const unsigned kZeroRegCode = 31;
+const unsigned kSPRegInternalCode = 63;
+const unsigned kRegCodeMask = 0x1f;
+
+const unsigned kAddressTagOffset = 56;
+const unsigned kAddressTagWidth = 8;
+const uint64_t kAddressTagMask =
+    ((UINT64_C(1) << kAddressTagWidth) - 1) << kAddressTagOffset;
+VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));
+
+static inline unsigned CalcLSDataSize(LoadStoreOp op) {
+  VIXL_ASSERT((LSSize_offset + LSSize_width) == (kInstructionSize * 8));
+  unsigned size = static_cast<Instr>(op) >> LSSize_offset;
+  if ((op & LSVector_mask) != 0) {
+    // Vector register memory operations encode the access size in the "size"
+    // and "opc" fields.
+    if ((size == 0) && ((op & LSOpc_mask) >> LSOpc_offset) >= 2) {
+      size = kQRegSizeInBytesLog2;
+    }
+  }
+  return size;
+}
+
+unsigned CalcLSPairDataSize(LoadStorePairOp op);
+
+enum ImmBranchType {
+  UnknownBranchType = 0,
+  CondBranchType    = 1,
+  UncondBranchType  = 2,
+  CompareBranchType = 3,
+  TestBranchType    = 4
+};
+
+// The classes of immediate branch ranges, in order of increasing range.
+// Note that CondBranchType and CompareBranchType have the same range.
+enum ImmBranchRangeType {
+  TestBranchRangeType,   // tbz/tbnz: imm14 = +/- 32KB.
+  CondBranchRangeType,   // b.cond/cbz/cbnz: imm19 = +/- 1MB.
+  UncondBranchRangeType, // b/bl: imm26 = +/- 128MB.
+  UnknownBranchRangeType,
+
+  // Number of 'short-range' branch range types.
+  // We don't consider unconditional branches 'short-range'.
+  NumShortBranchRangeTypes = UncondBranchRangeType
+};
+
+enum AddrMode {
+  Offset,
+  PreIndex,
+  PostIndex
+};
+
+enum Reg31Mode {
+  Reg31IsStackPointer,
+  Reg31IsZeroRegister
+};
+
+// Instructions. ---------------------------------------------------------------
+
+class Instruction {
+ public:
+  Instr InstructionBits() const {
+    return *(reinterpret_cast<const Instr*>(this));
+  }
+
+  void SetInstructionBits(Instr new_instr) {
+    *(reinterpret_cast<Instr*>(this)) = new_instr;
+  }
+
+  int Bit(int pos) const {
+    return (InstructionBits() >> pos) & 1;
+  }
+
+  uint32_t Bits(int msb, int lsb) const {
+    return ExtractUnsignedBitfield32(msb, lsb, InstructionBits());
+  }
+
+  int32_t SignedBits(int msb, int lsb) const {
+    int32_t bits = *(reinterpret_cast<const int32_t*>(this));
+    return ExtractSignedBitfield32(msb, lsb, bits);
+  }
+
+  Instr Mask(uint32_t mask) const {
+    return InstructionBits() & mask;
+  }
+
+  #define DEFINE_GETTER(Name, HighBit, LowBit, Func)             \
+  int32_t Name() const { return Func(HighBit, LowBit); }
+  INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
+  #undef DEFINE_GETTER
+
+  #define DEFINE_SETTER(Name, HighBit, LowBit, Func)             \
+  inline void Set##Name(unsigned n) { SetBits32(HighBit, LowBit, n); }
+  INSTRUCTION_FIELDS_LIST(DEFINE_SETTER)
+  #undef DEFINE_SETTER
+
+  // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST),
+  // formed from ImmPCRelLo and ImmPCRelHi.
+  int ImmPCRel() const {
+    int offset =
+        static_cast<int>((ImmPCRelHi() << ImmPCRelLo_width) | ImmPCRelLo());
+    int width = ImmPCRelLo_width + ImmPCRelHi_width;
+    return ExtractSignedBitfield32(width - 1, 0, offset);
+  }
+
+  uint64_t ImmLogical() const;
+  unsigned ImmNEONabcdefgh() const;
+  float ImmFP32() const;
+  double ImmFP64() const;
+  float ImmNEONFP32() const;
+  double ImmNEONFP64() const;
+
+  unsigned SizeLS() const {
+    return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask)));
+  }
+
+  unsigned SizeLSPair() const {
+    return CalcLSPairDataSize(
+        static_cast<LoadStorePairOp>(Mask(LoadStorePairMask)));
+  }
+
+  int NEONLSIndex(int access_size_shift) const {
+    int64_t q = NEONQ();
+    int64_t s = NEONS();
+    int64_t size = NEONLSSize();
+    int64_t index = (q << 3) | (s << 2) | size;
+    return static_cast<int>(index >> access_size_shift);
+  }
+
+  // Helpers.
+  bool IsCondBranchImm() const {
+    return Mask(ConditionalBranchFMask) == ConditionalBranchFixed;
+  }
+
+  bool IsUncondBranchImm() const {
+    return Mask(UnconditionalBranchFMask) == UnconditionalBranchFixed;
+  }
+
+  bool IsCompareBranch() const {
+    return Mask(CompareBranchFMask) == CompareBranchFixed;
+  }
+
+  bool IsTestBranch() const {
+    return Mask(TestBranchFMask) == TestBranchFixed;
+  }
+
+  bool IsImmBranch() const {
+    return BranchType() != UnknownBranchType;
+  }
+
+  bool IsPCRelAddressing() const {
+    return Mask(PCRelAddressingFMask) == PCRelAddressingFixed;
+  }
+
+  bool IsLogicalImmediate() const {
+    return Mask(LogicalImmediateFMask) == LogicalImmediateFixed;
+  }
+
+  bool IsAddSubImmediate() const {
+    return Mask(AddSubImmediateFMask) == AddSubImmediateFixed;
+  }
+
+  bool IsAddSubExtended() const {
+    return Mask(AddSubExtendedFMask) == AddSubExtendedFixed;
+  }
+
+  bool IsLoadOrStore() const {
+    return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
+  }
+
+  bool IsLoad() const;
+  bool IsStore() const;
+
+  bool IsLoadLiteral() const {
+    // This includes PRFM_lit.
+    return Mask(LoadLiteralFMask) == LoadLiteralFixed;
+  }
+
+  bool IsMovn() const {
+    return (Mask(MoveWideImmediateMask) == MOVN_x) ||
+           (Mask(MoveWideImmediateMask) == MOVN_w);
+  }
+
+  // Mozilla modifications.
+  bool IsUncondB() const;
+  bool IsCondB() const;
+  bool IsBL() const;
+  bool IsBR() const;
+  bool IsBLR() const;
+  bool IsTBZ() const;
+  bool IsTBNZ() const;
+  bool IsCBZ() const;
+  bool IsCBNZ() const;
+  bool IsLDR() const;
+  bool IsNOP() const;
+  bool IsCSDB() const;
+  bool IsADR() const;
+  bool IsADRP() const;
+  bool IsMovz() const;
+  bool IsMovk() const;
+  bool IsBranchLinkImm() const;
+  bool IsTargetReachable(const Instruction* target) const;
+  ptrdiff_t ImmPCRawOffset() const;
+  void SetImmPCRawOffset(ptrdiff_t offset);
+  void SetBits32(int msb, int lsb, unsigned value);
+
+  // Is this a stack pointer synchronization instruction as inserted by
+  // MacroAssembler::syncStackPtr()?
+  bool IsStackPtrSync() const;
+
+  static int ImmBranchRangeBitwidth(ImmBranchType branch_type);
+  static int32_t ImmBranchForwardRange(ImmBranchType branch_type);
+
+  // Check if offset can be encoded as a RAW offset in a branch_type
+  // instruction. The offset must be encodeable directly as the immediate field
+  // in the instruction, it is not scaled by kInstructionSize first.
+  static bool IsValidImmPCOffset(ImmBranchType branch_type, int64_t offset);
+
+  // Get the range type corresponding to a branch type.
+  static ImmBranchRangeType ImmBranchTypeToRange(ImmBranchType);
+
+  // Get the maximum realizable forward PC offset (in bytes) for an immediate
+  // branch of the given range type.
+  // This is the largest positive multiple of kInstructionSize, offset, such
+  // that:
+  //
+  //    IsValidImmPCOffset(xxx, offset / kInstructionSize)
+  //
+  // returns true for the same branch type.
+  static int32_t ImmBranchMaxForwardOffset(ImmBranchRangeType range_type);
+
+  // Get the minimuum realizable backward PC offset (in bytes) for an immediate
+  // branch of the given range type.
+  // This is the smallest (i.e., largest in magnitude) negative multiple of
+  // kInstructionSize, offset, such that:
+  //
+  //    IsValidImmPCOffset(xxx, offset / kInstructionSize)
+  //
+  // returns true for the same branch type.
+  static int32_t ImmBranchMinBackwardOffset(ImmBranchRangeType range_type);
+
+  // Indicate whether Rd can be the stack pointer or the zero register. This
+  // does not check that the instruction actually has an Rd field.
+  Reg31Mode RdMode() const {
+    // The following instructions use sp or wsp as Rd:
+    //  Add/sub (immediate) when not setting the flags.
+    //  Add/sub (extended) when not setting the flags.
+    //  Logical (immediate) when not setting the flags.
+    // Otherwise, r31 is the zero register.
+    if (IsAddSubImmediate() || IsAddSubExtended()) {
+      if (Mask(AddSubSetFlagsBit)) {
+        return Reg31IsZeroRegister;
+      } else {
+        return Reg31IsStackPointer;
+      }
+    }
+    if (IsLogicalImmediate()) {
+      // Of the logical (immediate) instructions, only ANDS (and its aliases)
+      // can set the flags. The others can all write into sp.
+      // Note that some logical operations are not available to
+      // immediate-operand instructions, so we have to combine two masks here.
+      if (Mask(LogicalImmediateMask & LogicalOpMask) == ANDS) {
+        return Reg31IsZeroRegister;
+      } else {
+        return Reg31IsStackPointer;
+      }
+    }
+    return Reg31IsZeroRegister;
+  }
+
+  // Indicate whether Rn can be the stack pointer or the zero register. This
+  // does not check that the instruction actually has an Rn field.
+  Reg31Mode RnMode() const {
+    // The following instructions use sp or wsp as Rn:
+    //  All loads and stores.
+    //  Add/sub (immediate).
+    //  Add/sub (extended).
+    // Otherwise, r31 is the zero register.
+    if (IsLoadOrStore() || IsAddSubImmediate() || IsAddSubExtended()) {
+      return Reg31IsStackPointer;
+    }
+    return Reg31IsZeroRegister;
+  }
+
+  ImmBranchType BranchType() const {
+    if (IsCondBranchImm()) {
+      return CondBranchType;
+    } else if (IsUncondBranchImm()) {
+      return UncondBranchType;
+    } else if (IsCompareBranch()) {
+      return CompareBranchType;
+    } else if (IsTestBranch()) {
+      return TestBranchType;
+    } else {
+      return UnknownBranchType;
+    }
+  }
+
+  // Find the target of this instruction. 'this' may be a branch or a
+  // PC-relative addressing instruction.
+  const Instruction* ImmPCOffsetTarget() const;
+
+  // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or
+  // a PC-relative addressing instruction.
+  void SetImmPCOffsetTarget(const Instruction* target);
+  // Patch a literal load instruction to load from 'source'.
+  void SetImmLLiteral(const Instruction* source);
+
+  // The range of a load literal instruction, expressed as 'instr +- range'.
+  // The range is actually the 'positive' range; the branch instruction can
+  // target [instr - range - kInstructionSize, instr + range].
+  static const int kLoadLiteralImmBitwidth = 19;
+  static const int kLoadLiteralRange =
+      (1 << kLoadLiteralImmBitwidth) / 2 - kInstructionSize;
+
+  // Calculate the address of a literal referred to by a load-literal
+  // instruction, and return it as the specified type.
+  //
+  // The literal itself is safely mutable only if the backing buffer is safely
+  // mutable.
+  template <typename T>
+  T LiteralAddress() const {
+    uint64_t base_raw = reinterpret_cast<uint64_t>(this);
+    int64_t offset = ImmLLiteral() << kLiteralEntrySizeLog2;
+    uint64_t address_raw = base_raw + offset;
+
+    // Cast the address using a C-style cast. A reinterpret_cast would be
+    // appropriate, but it can't cast one integral type to another.
+    T address = (T)(address_raw);
+
+    // Assert that the address can be represented by the specified type.
+    VIXL_ASSERT((uint64_t)(address) == address_raw);
+
+    return address;
+  }
+
+  uint32_t Literal32() const {
+    uint32_t literal;
+    memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal));
+    return literal;
+  }
+
+  uint64_t Literal64() const {
+    uint64_t literal;
+    memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal));
+    return literal;
+  }
+
+  void SetLiteral64(uint64_t literal) const {
+    memcpy(LiteralAddress<void*>(), &literal, sizeof(literal));
+  }
+
+  float LiteralFP32() const {
+    return RawbitsToFloat(Literal32());
+  }
+
+  double LiteralFP64() const {
+    return RawbitsToDouble(Literal64());
+  }
+
+  const Instruction* NextInstruction() const {
+    return this + kInstructionSize;
+  }
+
+  // Skip any constant pools with artificial guards at this point.
+  // Return either |this| or the first instruction after the pool.
+  const Instruction* skipPool() const;
+
+  const Instruction* InstructionAtOffset(int64_t offset) const {
+    VIXL_ASSERT(IsWordAligned(this + offset));
+    return this + offset;
+  }
+
+  template<typename T> static Instruction* Cast(T src) {
+    return reinterpret_cast<Instruction*>(src);
+  }
+
+  template<typename T> static const Instruction* CastConst(T src) {
+    return reinterpret_cast<const Instruction*>(src);
+  }
+
+ private:
+  int ImmBranch() const;
+
+  static float Imm8ToFP32(uint32_t imm8);
+  static double Imm8ToFP64(uint32_t imm8);
+
+  void SetPCRelImmTarget(const Instruction* target);
+  void SetBranchImmTarget(const Instruction* target);
+};
+
+
+// Functions for handling NEON vector format information.
+enum VectorFormat {
+  kFormatUndefined = 0xffffffff,
+  kFormat8B  = NEON_8B,
+  kFormat16B = NEON_16B,
+  kFormat4H  = NEON_4H,
+  kFormat8H  = NEON_8H,
+  kFormat2S  = NEON_2S,
+  kFormat4S  = NEON_4S,
+  kFormat1D  = NEON_1D,
+  kFormat2D  = NEON_2D,
+
+  // Scalar formats. We add the scalar bit to distinguish between scalar and
+  // vector enumerations; the bit is always set in the encoding of scalar ops
+  // and always clear for vector ops. Although kFormatD and kFormat1D appear
+  // to be the same, their meaning is subtly different. The first is a scalar
+  // operation, the second a vector operation that only affects one lane.
+  kFormatB = NEON_B | NEONScalar,
+  kFormatH = NEON_H | NEONScalar,
+  kFormatS = NEON_S | NEONScalar,
+  kFormatD = NEON_D | NEONScalar
+};
+
+VectorFormat VectorFormatHalfWidth(const VectorFormat vform);
+VectorFormat VectorFormatDoubleWidth(const VectorFormat vform);
+VectorFormat VectorFormatDoubleLanes(const VectorFormat vform);
+VectorFormat VectorFormatHalfLanes(const VectorFormat vform);
+VectorFormat ScalarFormatFromLaneSize(int lanesize);
+VectorFormat VectorFormatHalfWidthDoubleLanes(const VectorFormat vform);
+VectorFormat VectorFormatFillQ(const VectorFormat vform);
+unsigned RegisterSizeInBitsFromFormat(VectorFormat vform);
+unsigned RegisterSizeInBytesFromFormat(VectorFormat vform);
+// TODO: Make the return types of these functions consistent.
+unsigned LaneSizeInBitsFromFormat(VectorFormat vform);
+int LaneSizeInBytesFromFormat(VectorFormat vform);
+int LaneSizeInBytesLog2FromFormat(VectorFormat vform);
+int LaneCountFromFormat(VectorFormat vform);
+int MaxLaneCountFromFormat(VectorFormat vform);
+bool IsVectorFormat(VectorFormat vform);
+int64_t MaxIntFromFormat(VectorFormat vform);
+int64_t MinIntFromFormat(VectorFormat vform);
+uint64_t MaxUintFromFormat(VectorFormat vform);
+
+
+enum NEONFormat {
+  NF_UNDEF = 0,
+  NF_8B    = 1,
+  NF_16B   = 2,
+  NF_4H    = 3,
+  NF_8H    = 4,
+  NF_2S    = 5,
+  NF_4S    = 6,
+  NF_1D    = 7,
+  NF_2D    = 8,
+  NF_B     = 9,
+  NF_H     = 10,
+  NF_S     = 11,
+  NF_D     = 12
+};
+
+static const unsigned kNEONFormatMaxBits = 6;
+
+struct NEONFormatMap {
+  // The bit positions in the instruction to consider.
+  uint8_t bits[kNEONFormatMaxBits];
+
+  // Mapping from concatenated bits to format.
+  NEONFormat map[1 << kNEONFormatMaxBits];
+};
+
+class NEONFormatDecoder {
+ public:
+  enum SubstitutionMode {
+    kPlaceholder,
+    kFormat
+  };
+
+  // Construct a format decoder with increasingly specific format maps for each
+  // subsitution. If no format map is specified, the default is the integer
+  // format map.
+  explicit NEONFormatDecoder(const Instruction* instr) {
+    instrbits_ = instr->InstructionBits();
+    SetFormatMaps(IntegerFormatMap());
+  }
+  NEONFormatDecoder(const Instruction* instr,
+                    const NEONFormatMap* format) {
+    instrbits_ = instr->InstructionBits();
+    SetFormatMaps(format);
+  }
+  NEONFormatDecoder(const Instruction* instr,
+                    const NEONFormatMap* format0,
+                    const NEONFormatMap* format1) {
+    instrbits_ = instr->InstructionBits();
+    SetFormatMaps(format0, format1);
+  }
+  NEONFormatDecoder(const Instruction* instr,
+                    const NEONFormatMap* format0,
+                    const NEONFormatMap* format1,
+                    const NEONFormatMap* format2) {
+    instrbits_ = instr->InstructionBits();
+    SetFormatMaps(format0, format1, format2);
+  }
+
+  // Set the format mapping for all or individual substitutions.
+  void SetFormatMaps(const NEONFormatMap* format0,
+                     const NEONFormatMap* format1 = NULL,
+                     const NEONFormatMap* format2 = NULL) {
+    VIXL_ASSERT(format0 != NULL);
+    formats_[0] = format0;
+    formats_[1] = (format1 == NULL) ? formats_[0] : format1;
+    formats_[2] = (format2 == NULL) ? formats_[1] : format2;
+  }
+  void SetFormatMap(unsigned index, const NEONFormatMap* format) {
+    VIXL_ASSERT(index <= (sizeof(formats_) / sizeof(formats_[0])));
+    VIXL_ASSERT(format != NULL);
+    formats_[index] = format;
+  }
+
+  // Substitute %s in the input string with the placeholder string for each
+  // register, ie. "'B", "'H", etc.
+  const char* SubstitutePlaceholders(const char* string) {
+    return Substitute(string, kPlaceholder, kPlaceholder, kPlaceholder);
+  }
+
+  // Substitute %s in the input string with a new string based on the
+  // substitution mode.
+  const char* Substitute(const char* string,
+                         SubstitutionMode mode0 = kFormat,
+                         SubstitutionMode mode1 = kFormat,
+                         SubstitutionMode mode2 = kFormat) {
+    snprintf(form_buffer_, sizeof(form_buffer_), string,
+             GetSubstitute(0, mode0),
+             GetSubstitute(1, mode1),
+             GetSubstitute(2, mode2));
+    return form_buffer_;
+  }
+
+  // Append a "2" to a mnemonic string based of the state of the Q bit.
+  const char* Mnemonic(const char* mnemonic) {
+    if ((instrbits_ & NEON_Q) != 0) {
+      snprintf(mne_buffer_, sizeof(mne_buffer_), "%s2", mnemonic);
+      return mne_buffer_;
+    }
+    return mnemonic;
+  }
+
+  VectorFormat GetVectorFormat(int format_index = 0) {
+    return GetVectorFormat(formats_[format_index]);
+  }
+
+  VectorFormat GetVectorFormat(const NEONFormatMap* format_map) {
+    static const VectorFormat vform[] = {
+      kFormatUndefined,
+      kFormat8B, kFormat16B, kFormat4H, kFormat8H,
+      kFormat2S, kFormat4S, kFormat1D, kFormat2D,
+      kFormatB, kFormatH, kFormatS, kFormatD
+    };
+    VIXL_ASSERT(GetNEONFormat(format_map) < (sizeof(vform) / sizeof(vform[0])));
+    return vform[GetNEONFormat(format_map)];
+  }
+
+  // Built in mappings for common cases.
+
+  // The integer format map uses three bits (Q, size<1:0>) to encode the
+  // "standard" set of NEON integer vector formats.
+  static const NEONFormatMap* IntegerFormatMap() {
+    static const NEONFormatMap map = {
+      {23, 22, 30},
+      {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_2D}
+    };
+    return &map;
+  }
+
+  // The long integer format map uses two bits (size<1:0>) to encode the
+  // long set of NEON integer vector formats. These are used in narrow, wide
+  // and long operations.
+  static const NEONFormatMap* LongIntegerFormatMap() {
+    static const NEONFormatMap map = {
+      {23, 22}, {NF_8H, NF_4S, NF_2D}
+    };
+    return &map;
+  }
+
+  // The FP format map uses two bits (Q, size<0>) to encode the NEON FP vector
+  // formats: NF_2S, NF_4S, NF_2D.
+  static const NEONFormatMap* FPFormatMap() {
+    // The FP format map assumes two bits (Q, size<0>) are used to encode the
+    // NEON FP vector formats: NF_2S, NF_4S, NF_2D.
+    static const NEONFormatMap map = {
+      {22, 30}, {NF_2S, NF_4S, NF_UNDEF, NF_2D}
+    };
+    return &map;
+  }
+
+  // The load/store format map uses three bits (Q, 11, 10) to encode the
+  // set of NEON vector formats.
+  static const NEONFormatMap* LoadStoreFormatMap() {
+    static const NEONFormatMap map = {
+      {11, 10, 30},
+      {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}
+    };
+    return &map;
+  }
+
+  // The logical format map uses one bit (Q) to encode the NEON vector format:
+  // NF_8B, NF_16B.
+  static const NEONFormatMap* LogicalFormatMap() {
+    static const NEONFormatMap map = {
+      {30}, {NF_8B, NF_16B}
+    };
+    return &map;
+  }
+
+  // The triangular format map uses between two and five bits to encode the NEON
+  // vector format:
+  // xxx10->8B, xxx11->16B, xx100->4H, xx101->8H
+  // x1000->2S, x1001->4S,  10001->2D, all others undefined.
+  static const NEONFormatMap* TriangularFormatMap() {
+    static const NEONFormatMap map = {
+      {19, 18, 17, 16, 30},
+      {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_2S,
+       NF_4S, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_UNDEF, NF_2D,
+       NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_2S, NF_4S, NF_8B, NF_16B,
+       NF_4H, NF_8H, NF_8B, NF_16B}
+    };
+    return &map;
+  }
+
+  // The scalar format map uses two bits (size<1:0>) to encode the NEON scalar
+  // formats: NF_B, NF_H, NF_S, NF_D.
+  static const NEONFormatMap* ScalarFormatMap() {
+    static const NEONFormatMap map = {
+      {23, 22}, {NF_B, NF_H, NF_S, NF_D}
+    };
+    return &map;
+  }
+
+  // The long scalar format map uses two bits (size<1:0>) to encode the longer
+  // NEON scalar formats: NF_H, NF_S, NF_D.
+  static const NEONFormatMap* LongScalarFormatMap() {
+    static const NEONFormatMap map = {
+      {23, 22}, {NF_H, NF_S, NF_D}
+    };
+    return &map;
+  }
+
+  // The FP scalar format map assumes one bit (size<0>) is used to encode the
+  // NEON FP scalar formats: NF_S, NF_D.
+  static const NEONFormatMap* FPScalarFormatMap() {
+    static const NEONFormatMap map = {
+      {22}, {NF_S, NF_D}
+    };
+    return &map;
+  }
+
+  // The triangular scalar format map uses between one and four bits to encode
+  // the NEON FP scalar formats:
+  // xxx1->B, xx10->H, x100->S, 1000->D, all others undefined.
+  static const NEONFormatMap* TriangularScalarFormatMap() {
+    static const NEONFormatMap map = {
+      {19, 18, 17, 16},
+      {NF_UNDEF, NF_B, NF_H, NF_B, NF_S, NF_B, NF_H, NF_B,
+       NF_D,     NF_B, NF_H, NF_B, NF_S, NF_B, NF_H, NF_B}
+    };
+    return &map;
+  }
+
+ private:
+  // Get a pointer to a string that represents the format or placeholder for
+  // the specified substitution index, based on the format map and instruction.
+  const char* GetSubstitute(int index, SubstitutionMode mode) {
+    if (mode == kFormat) {
+      return NEONFormatAsString(GetNEONFormat(formats_[index]));
+    }
+    VIXL_ASSERT(mode == kPlaceholder);
+    return NEONFormatAsPlaceholder(GetNEONFormat(formats_[index]));
+  }
+
+  // Get the NEONFormat enumerated value for bits obtained from the
+  // instruction based on the specified format mapping.
+  NEONFormat GetNEONFormat(const NEONFormatMap* format_map) {
+    return format_map->map[PickBits(format_map->bits)];
+  }
+
+  // Convert a NEONFormat into a string.
+  static const char* NEONFormatAsString(NEONFormat format) {
+    static const char* formats[] = {
+      "undefined",
+      "8b", "16b", "4h", "8h", "2s", "4s", "1d", "2d",
+      "b", "h", "s", "d"
+    };
+    VIXL_ASSERT(format < (sizeof(formats) / sizeof(formats[0])));
+    return formats[format];
+  }
+
+  // Convert a NEONFormat into a register placeholder string.
+  static const char* NEONFormatAsPlaceholder(NEONFormat format) {
+    VIXL_ASSERT((format == NF_B) || (format == NF_H) ||
+                (format == NF_S) || (format == NF_D) ||
+                (format == NF_UNDEF));
+    static const char* formats[] = {
+      "undefined",
+      "undefined", "undefined", "undefined", "undefined",
+      "undefined", "undefined", "undefined", "undefined",
+      "'B", "'H", "'S", "'D"
+    };
+    return formats[format];
+  }
+
+  // Select bits from instrbits_ defined by the bits array, concatenate them,
+  // and return the value.
+  uint8_t PickBits(const uint8_t bits[]) {
+    uint8_t result = 0;
+    for (unsigned b = 0; b < kNEONFormatMaxBits; b++) {
+      if (bits[b] == 0) break;
+      result <<= 1;
+      result |= ((instrbits_ & (1 << bits[b])) == 0) ? 0 : 1;
+    }
+    return result;
+  }
+
+  Instr instrbits_;
+  const NEONFormatMap* formats_[3];
+  char form_buffer_[64];
+  char mne_buffer_[16];
+};
+}  // namespace vixl
+
+#endif  // VIXL_A64_INSTRUCTIONS_A64_H_
diff --git a/js/src/jit/arm64/vixl/Instrument-vixl.cpp b/js/src/jit/arm64/vixl/Instrument-vixl.cpp
new file mode 100644
index 0000000000..c07495c29d
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Instrument-vixl.cpp
@@ -0,0 +1,850 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Instrument-vixl.h"
+
+namespace vixl {
+
+Counter::Counter(const char* name, CounterType type)
+    : count_(0), enabled_(false), type_(type) {
+  VIXL_ASSERT(name != NULL);
+  strncpy(name_, name, kCounterNameMaxLength);
+}
+
+
+void Counter::Enable() {
+  enabled_ = true;
+}
+
+
+void Counter::Disable() {
+  enabled_ = false;
+}
+
+
+bool Counter::IsEnabled() {
+  return enabled_;
+}
+
+
+void Counter::Increment() {
+  if (enabled_) {
+    count_++;
+  }
+}
+
+
+uint64_t Counter::count() {
+  uint64_t result = count_;
+  if (type_ == Gauge) {
+    // If the counter is a Gauge, reset the count after reading.
+    count_ = 0;
+  }
+  return result;
+}
+
+
+const char* Counter::name() {
+  return name_;
+}
+
+
+CounterType Counter::type() {
+  return type_;
+}
+
+
+struct CounterDescriptor {
+  const char* name;
+  CounterType type;
+};
+
+
+static const CounterDescriptor kCounterList[] = {
+  {"Instruction", Cumulative},
+
+  {"Move Immediate", Gauge},
+  {"Add/Sub DP", Gauge},
+  {"Logical DP", Gauge},
+  {"Other Int DP", Gauge},
+  {"FP DP", Gauge},
+
+  {"Conditional Select", Gauge},
+  {"Conditional Compare", Gauge},
+
+  {"Unconditional Branch", Gauge},
+  {"Compare and Branch", Gauge},
+  {"Test and Branch", Gauge},
+  {"Conditional Branch", Gauge},
+
+  {"Load Integer", Gauge},
+  {"Load FP", Gauge},
+  {"Load Pair", Gauge},
+  {"Load Literal", Gauge},
+
+  {"Store Integer", Gauge},
+  {"Store FP", Gauge},
+  {"Store Pair", Gauge},
+
+  {"PC Addressing", Gauge},
+  {"Other", Gauge},
+  {"NEON", Gauge},
+  {"Crypto", Gauge}
+};
+
+
+Instrument::Instrument(const char* datafile, uint64_t sample_period)
+    : output_stream_(stdout), sample_period_(sample_period) {
+
+  // Set up the output stream. If datafile is non-NULL, use that file. If it
+  // can't be opened, or datafile is NULL, use stdout.
+  if (datafile != NULL) {
+    output_stream_ = fopen(datafile, "w");
+    if (output_stream_ == NULL) {
+      printf("Can't open output file %s. Using stdout.\n", datafile);
+      output_stream_ = stdout;
+    }
+  }
+
+  static const int num_counters =
+    sizeof(kCounterList) / sizeof(CounterDescriptor);
+
+  // Dump an instrumentation description comment at the top of the file.
+  fprintf(output_stream_, "# counters=%d\n", num_counters);
+  fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_);
+
+  // Construct Counter objects from counter description array.
+  for (int i = 0; i < num_counters; i++) {
+    if (Counter* counter = js_new<Counter>(kCounterList[i].name, kCounterList[i].type))
+      (void)counters_.append(counter);
+  }
+
+  DumpCounterNames();
+}
+
+
+Instrument::~Instrument() {
+  // Dump any remaining instruction data to the output file.
+  DumpCounters();
+
+  // Free all the counter objects.
+  for (auto counter : counters_) {
+    js_delete(counter);
+  }
+
+  if (output_stream_ != stdout) {
+    fclose(output_stream_);
+  }
+}
+
+
+void Instrument::Update() {
+  // Increment the instruction counter, and dump all counters if a sample period
+  // has elapsed.
+  static Counter* counter = GetCounter("Instruction");
+  VIXL_ASSERT(counter->type() == Cumulative);
+  counter->Increment();
+
+  if (counter->IsEnabled() && (counter->count() % sample_period_) == 0) {
+    DumpCounters();
+  }
+}
+
+
+void Instrument::DumpCounters() {
+  // Iterate through the counter objects, dumping their values to the output
+  // stream.
+  for (auto counter : counters_) {
+    fprintf(output_stream_, "%" PRIu64 ",", counter->count());
+  }
+  fprintf(output_stream_, "\n");
+  fflush(output_stream_);
+}
+
+
+void Instrument::DumpCounterNames() {
+  // Iterate through the counter objects, dumping the counter names to the
+  // output stream.
+  for (auto counter : counters_) {
+    fprintf(output_stream_, "%s,", counter->name());
+  }
+  fprintf(output_stream_, "\n");
+  fflush(output_stream_);
+}
+
+
+void Instrument::HandleInstrumentationEvent(unsigned event) {
+  switch (event) {
+    case InstrumentStateEnable: Enable(); break;
+    case InstrumentStateDisable: Disable(); break;
+    default: DumpEventMarker(event);
+  }
+}
+
+
+void Instrument::DumpEventMarker(unsigned marker) {
+  // Dumpan event marker to the output stream as a specially formatted comment
+  // line.
+  static Counter* counter = GetCounter("Instruction");
+
+  fprintf(output_stream_, "# %c%c @ %" PRId64 "\n", marker & 0xff,
+          (marker >> 8) & 0xff, counter->count());
+}
+
+
+Counter* Instrument::GetCounter(const char* name) {
+  // Get a Counter object by name from the counter list.
+  for (auto counter : counters_) {
+    if (strcmp(counter->name(), name) == 0) {
+      return counter;
+    }
+  }
+
+  // A Counter by that name does not exist: print an error message to stderr
+  // and the output file, and exit.
+  static const char* error_message =
+    "# Error: Unknown counter \"%s\". Exiting.\n";
+  fprintf(stderr, error_message, name);
+  fprintf(output_stream_, error_message, name);
+  exit(1);
+}
+
+
+void Instrument::Enable() {
+  for (auto counter : counters_) {
+    counter->Enable();
+  }
+}
+
+
+void Instrument::Disable() {
+  for (auto counter : counters_) {
+    counter->Disable();
+  }
+}
+
+
+void Instrument::VisitPCRelAddressing(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("PC Addressing");
+  counter->Increment();
+}
+
+
+void Instrument::VisitAddSubImmediate(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Add/Sub DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitLogicalImmediate(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Logical DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitMoveWideImmediate(const Instruction* instr) {
+  Update();
+  static Counter* counter = GetCounter("Move Immediate");
+
+  if (instr->IsMovn() && (instr->Rd() == kZeroRegCode)) {
+    unsigned imm = instr->ImmMoveWide();
+    HandleInstrumentationEvent(imm);
+  } else {
+    counter->Increment();
+  }
+}
+
+
+void Instrument::VisitBitfield(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other Int DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitExtract(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other Int DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitUnconditionalBranch(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Unconditional Branch");
+  counter->Increment();
+}
+
+
+void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Unconditional Branch");
+  counter->Increment();
+}
+
+
+void Instrument::VisitCompareBranch(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Compare and Branch");
+  counter->Increment();
+}
+
+
+void Instrument::VisitTestBranch(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Test and Branch");
+  counter->Increment();
+}
+
+
+void Instrument::VisitConditionalBranch(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Conditional Branch");
+  counter->Increment();
+}
+
+
+void Instrument::VisitSystem(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other");
+  counter->Increment();
+}
+
+
+void Instrument::VisitException(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other");
+  counter->Increment();
+}
+
+
+void Instrument::InstrumentLoadStorePair(const Instruction* instr) {
+  static Counter* load_pair_counter = GetCounter("Load Pair");
+  static Counter* store_pair_counter = GetCounter("Store Pair");
+
+  if (instr->Mask(LoadStorePairLBit) != 0) {
+    load_pair_counter->Increment();
+  } else {
+    store_pair_counter->Increment();
+  }
+}
+
+
+void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) {
+  Update();
+  InstrumentLoadStorePair(instr);
+}
+
+
+void Instrument::VisitLoadStorePairOffset(const Instruction* instr) {
+  Update();
+  InstrumentLoadStorePair(instr);
+}
+
+
+void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) {
+  Update();
+  InstrumentLoadStorePair(instr);
+}
+
+
+void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) {
+  Update();
+  InstrumentLoadStorePair(instr);
+}
+
+
+void Instrument::VisitLoadStoreExclusive(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other");
+  counter->Increment();
+}
+
+void Instrument::VisitAtomicMemory(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other");
+  counter->Increment();
+}
+
+void Instrument::VisitLoadLiteral(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Load Literal");
+  counter->Increment();
+}
+
+
+void Instrument::InstrumentLoadStore(const Instruction* instr) {
+  static Counter* load_int_counter = GetCounter("Load Integer");
+  static Counter* store_int_counter = GetCounter("Store Integer");
+  static Counter* load_fp_counter = GetCounter("Load FP");
+  static Counter* store_fp_counter = GetCounter("Store FP");
+
+  switch (instr->Mask(LoadStoreMask)) {
+    case STRB_w:
+    case STRH_w:
+    case STR_w:
+      VIXL_FALLTHROUGH();
+    case STR_x:     store_int_counter->Increment(); break;
+    case STR_s:
+      VIXL_FALLTHROUGH();
+    case STR_d:     store_fp_counter->Increment(); break;
+    case LDRB_w:
+    case LDRH_w:
+    case LDR_w:
+    case LDR_x:
+    case LDRSB_x:
+    case LDRSH_x:
+    case LDRSW_x:
+    case LDRSB_w:
+      VIXL_FALLTHROUGH();
+    case LDRSH_w:   load_int_counter->Increment(); break;
+    case LDR_s:
+      VIXL_FALLTHROUGH();
+    case LDR_d:     load_fp_counter->Increment(); break;
+  }
+}
+
+
+void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
+  Update();
+  InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLoadStorePostIndex(const Instruction* instr) {
+  USE(instr);
+  Update();
+  InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLoadStorePreIndex(const Instruction* instr) {
+  Update();
+  InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) {
+  Update();
+  InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
+  Update();
+  InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLogicalShifted(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Logical DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitAddSubShifted(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Add/Sub DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitAddSubExtended(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Add/Sub DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitAddSubWithCarry(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Add/Sub DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitConditionalCompareRegister(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Conditional Compare");
+  counter->Increment();
+}
+
+
+void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Conditional Compare");
+  counter->Increment();
+}
+
+
+void Instrument::VisitConditionalSelect(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Conditional Select");
+  counter->Increment();
+}
+
+
+void Instrument::VisitDataProcessing1Source(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other Int DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitDataProcessing2Source(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other Int DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitDataProcessing3Source(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other Int DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPCompare(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("FP DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPConditionalCompare(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Conditional Compare");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPConditionalSelect(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Conditional Select");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPImmediate(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("FP DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("FP DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("FP DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("FP DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPIntegerConvert(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("FP DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitFPFixedPointConvert(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("FP DP");
+  counter->Increment();
+}
+
+
+void Instrument::VisitCrypto2RegSHA(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Crypto");
+  counter->Increment();
+}
+
+
+void Instrument::VisitCrypto3RegSHA(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Crypto");
+  counter->Increment();
+}
+
+
+void Instrument::VisitCryptoAES(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Crypto");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEON2RegMisc(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEON3Same(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEON3Different(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONAcrossLanes(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONByIndexedElement(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONCopy(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONExtract(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONLoadStoreMultiStructPostIndex(
+    const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONLoadStoreSingleStructPostIndex(
+    const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalar3Diff(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalar3Same(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalarCopy(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalarPairwise(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONShiftImmediate(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONTable(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitNEONPerm(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("NEON");
+  counter->Increment();
+}
+
+
+void Instrument::VisitUnallocated(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other");
+  counter->Increment();
+}
+
+
+void Instrument::VisitUnimplemented(const Instruction* instr) {
+  USE(instr);
+  Update();
+  static Counter* counter = GetCounter("Other");
+  counter->Increment();
+}
+
+
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Instrument-vixl.h b/js/src/jit/arm64/vixl/Instrument-vixl.h
new file mode 100644
index 0000000000..eca076d234
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Instrument-vixl.h
@@ -0,0 +1,109 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_INSTRUMENT_A64_H_
+#define VIXL_A64_INSTRUMENT_A64_H_
+
+#include "mozilla/Vector.h"
+
+#include "jit/arm64/vixl/Constants-vixl.h"
+#include "jit/arm64/vixl/Decoder-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+#include "js/AllocPolicy.h"
+
+namespace vixl {
+
+const int kCounterNameMaxLength = 256;
+const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22;
+
+
+enum InstrumentState {
+  InstrumentStateDisable = 0,
+  InstrumentStateEnable = 1
+};
+
+
+enum CounterType {
+  Gauge = 0,      // Gauge counters reset themselves after reading.
+  Cumulative = 1  // Cumulative counters keep their value after reading.
+};
+
+
+class Counter {
+ public:
+  explicit Counter(const char* name, CounterType type = Gauge);
+
+  void Increment();
+  void Enable();
+  void Disable();
+  bool IsEnabled();
+  uint64_t count();
+  const char* name();
+  CounterType type();
+
+ private:
+  char name_[kCounterNameMaxLength];
+  uint64_t count_;
+  bool enabled_;
+  CounterType type_;
+};
+
+
+class Instrument: public DecoderVisitor {
+ public:
+  explicit Instrument(const char* datafile = NULL,
+    uint64_t sample_period = kDefaultInstrumentationSamplingPeriod);
+  ~Instrument();
+
+  void Enable();
+  void Disable();
+
+  // Declare all Visitor functions.
+  #define DECLARE(A) void Visit##A(const Instruction* instr) override;
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+ private:
+  void Update();
+  void DumpCounters();
+  void DumpCounterNames();
+  void DumpEventMarker(unsigned marker);
+  void HandleInstrumentationEvent(unsigned event);
+  Counter* GetCounter(const char* name);
+
+  void InstrumentLoadStore(const Instruction* instr);
+  void InstrumentLoadStorePair(const Instruction* instr);
+
+  mozilla::Vector<Counter*, 8, js::SystemAllocPolicy> counters_;
+
+  FILE *output_stream_;
+  uint64_t sample_period_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_A64_INSTRUMENT_A64_H_
diff --git a/js/src/jit/arm64/vixl/Logic-vixl.cpp b/js/src/jit/arm64/vixl/Logic-vixl.cpp
new file mode 100644
index 0000000000..71821a333f
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Logic-vixl.cpp
@@ -0,0 +1,4738 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef JS_SIMULATOR_ARM64
+
+#include <cmath>
+
+#include "jit/arm64/vixl/Simulator-vixl.h"
+
+namespace vixl {
+
+template<> double Simulator::FPDefaultNaN<double>() {
+  return kFP64DefaultNaN;
+}
+
+
+template<> float Simulator::FPDefaultNaN<float>() {
+  return kFP32DefaultNaN;
+}
+
+
+double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
+  if (src >= 0) {
+    return UFixedToDouble(src, fbits, round);
+  } else {
+    // This works for all negative values, including INT64_MIN.
+    return -UFixedToDouble(-src, fbits, round);
+  }
+}
+
+
+double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
+  // An input of 0 is a special case because the result is effectively
+  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
+  if (src == 0) {
+    return 0.0;
+  }
+
+  // Calculate the exponent. The highest significant bit will have the value
+  // 2^exponent.
+  const int highest_significant_bit = 63 - CountLeadingZeros(src);
+  const int64_t exponent = highest_significant_bit - fbits;
+
+  return FPRoundToDouble(0, exponent, src, round);
+}
+
+
+float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
+  if (src >= 0) {
+    return UFixedToFloat(src, fbits, round);
+  } else {
+    // This works for all negative values, including INT64_MIN.
+    return -UFixedToFloat(-src, fbits, round);
+  }
+}
+
+
+float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
+  // An input of 0 is a special case because the result is effectively
+  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
+  if (src == 0) {
+    return 0.0f;
+  }
+
+  // Calculate the exponent. The highest significant bit will have the value
+  // 2^exponent.
+  const int highest_significant_bit = 63 - CountLeadingZeros(src);
+  const int32_t exponent = highest_significant_bit - fbits;
+
+  return FPRoundToFloat(0, exponent, src, round);
+}
+
+
+void Simulator::ld1(VectorFormat vform,
+                    LogicVRegister dst,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, 16))
+    return;
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.ReadUintFromMem(vform, i, addr);
+    addr += LaneSizeInBytesFromFormat(vform);
+  }
+}
+
+
+void Simulator::ld1(VectorFormat vform,
+                    LogicVRegister dst,
+                    int index,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)))
+    return;
+  dst.ReadUintFromMem(vform, index, addr);
+}
+
+
+void Simulator::ld1r(VectorFormat vform,
+                     LogicVRegister dst,
+                     uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)))
+    return;
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.ReadUintFromMem(vform, i, addr);
+  }
+}
+
+
+void Simulator::ld2(VectorFormat vform,
+                    LogicVRegister dst1,
+                    LogicVRegister dst2,
+                    uint64_t addr1) {
+  if (handle_wasm_seg_fault(addr1, 16*2))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  int esize = LaneSizeInBytesFromFormat(vform);
+  uint64_t addr2 = addr1 + esize;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst1.ReadUintFromMem(vform, i, addr1);
+    dst2.ReadUintFromMem(vform, i, addr2);
+    addr1 += 2 * esize;
+    addr2 += 2 * esize;
+  }
+}
+
+
+void Simulator::ld2(VectorFormat vform,
+                    LogicVRegister dst1,
+                    LogicVRegister dst2,
+                    int index,
+                    uint64_t addr1) {
+  if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*2))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
+  dst1.ReadUintFromMem(vform, index, addr1);
+  dst2.ReadUintFromMem(vform, index, addr2);
+}
+
+
+void Simulator::ld2r(VectorFormat vform,
+                     LogicVRegister dst1,
+                     LogicVRegister dst2,
+                     uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*2))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst1.ReadUintFromMem(vform, i, addr);
+    dst2.ReadUintFromMem(vform, i, addr2);
+  }
+}
+
+
+void Simulator::ld3(VectorFormat vform,
+                    LogicVRegister dst1,
+                    LogicVRegister dst2,
+                    LogicVRegister dst3,
+                    uint64_t addr1) {
+  if (handle_wasm_seg_fault(addr1, 16*3))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  dst3.ClearForWrite(vform);
+  int esize = LaneSizeInBytesFromFormat(vform);
+  uint64_t addr2 = addr1 + esize;
+  uint64_t addr3 = addr2 + esize;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst1.ReadUintFromMem(vform, i, addr1);
+    dst2.ReadUintFromMem(vform, i, addr2);
+    dst3.ReadUintFromMem(vform, i, addr3);
+    addr1 += 3 * esize;
+    addr2 += 3 * esize;
+    addr3 += 3 * esize;
+  }
+}
+
+
+void Simulator::ld3(VectorFormat vform,
+                    LogicVRegister dst1,
+                    LogicVRegister dst2,
+                    LogicVRegister dst3,
+                    int index,
+                    uint64_t addr1) {
+  if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*3))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  dst3.ClearForWrite(vform);
+  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
+  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
+  dst1.ReadUintFromMem(vform, index, addr1);
+  dst2.ReadUintFromMem(vform, index, addr2);
+  dst3.ReadUintFromMem(vform, index, addr3);
+}
+
+
+void Simulator::ld3r(VectorFormat vform,
+                     LogicVRegister dst1,
+                     LogicVRegister dst2,
+                     LogicVRegister dst3,
+                     uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*3))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  dst3.ClearForWrite(vform);
+  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
+  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst1.ReadUintFromMem(vform, i, addr);
+    dst2.ReadUintFromMem(vform, i, addr2);
+    dst3.ReadUintFromMem(vform, i, addr3);
+  }
+}
+
+
+void Simulator::ld4(VectorFormat vform,
+                    LogicVRegister dst1,
+                    LogicVRegister dst2,
+                    LogicVRegister dst3,
+                    LogicVRegister dst4,
+                    uint64_t addr1) {
+  if (handle_wasm_seg_fault(addr1, 16*4))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  dst3.ClearForWrite(vform);
+  dst4.ClearForWrite(vform);
+  int esize = LaneSizeInBytesFromFormat(vform);
+  uint64_t addr2 = addr1 + esize;
+  uint64_t addr3 = addr2 + esize;
+  uint64_t addr4 = addr3 + esize;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst1.ReadUintFromMem(vform, i, addr1);
+    dst2.ReadUintFromMem(vform, i, addr2);
+    dst3.ReadUintFromMem(vform, i, addr3);
+    dst4.ReadUintFromMem(vform, i, addr4);
+    addr1 += 4 * esize;
+    addr2 += 4 * esize;
+    addr3 += 4 * esize;
+    addr4 += 4 * esize;
+  }
+}
+
+
+void Simulator::ld4(VectorFormat vform,
+                    LogicVRegister dst1,
+                    LogicVRegister dst2,
+                    LogicVRegister dst3,
+                    LogicVRegister dst4,
+                    int index,
+                    uint64_t addr1) {
+  if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*4))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  dst3.ClearForWrite(vform);
+  dst4.ClearForWrite(vform);
+  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
+  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
+  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
+  dst1.ReadUintFromMem(vform, index, addr1);
+  dst2.ReadUintFromMem(vform, index, addr2);
+  dst3.ReadUintFromMem(vform, index, addr3);
+  dst4.ReadUintFromMem(vform, index, addr4);
+}
+
+
+void Simulator::ld4r(VectorFormat vform,
+                     LogicVRegister dst1,
+                     LogicVRegister dst2,
+                     LogicVRegister dst3,
+                     LogicVRegister dst4,
+                     uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*4))
+    return;
+  dst1.ClearForWrite(vform);
+  dst2.ClearForWrite(vform);
+  dst3.ClearForWrite(vform);
+  dst4.ClearForWrite(vform);
+  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
+  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
+  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst1.ReadUintFromMem(vform, i, addr);
+    dst2.ReadUintFromMem(vform, i, addr2);
+    dst3.ReadUintFromMem(vform, i, addr3);
+    dst4.ReadUintFromMem(vform, i, addr4);
+  }
+}
+
+
+void Simulator::st1(VectorFormat vform,
+                    LogicVRegister src,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, 16))
+    return;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    src.WriteUintToMem(vform, i, addr);
+    addr += LaneSizeInBytesFromFormat(vform);
+  }
+}
+
+
+void Simulator::st1(VectorFormat vform,
+                    LogicVRegister src,
+                    int index,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)))
+    return;
+  src.WriteUintToMem(vform, index, addr);
+}
+
+
+void Simulator::st2(VectorFormat vform,
+                    LogicVRegister dst,
+                    LogicVRegister dst2,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, 16*2))
+    return;
+  int esize = LaneSizeInBytesFromFormat(vform);
+  uint64_t addr2 = addr + esize;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.WriteUintToMem(vform, i, addr);
+    dst2.WriteUintToMem(vform, i, addr2);
+    addr += 2 * esize;
+    addr2 += 2 * esize;
+  }
+}
+
+
+void Simulator::st2(VectorFormat vform,
+                    LogicVRegister dst,
+                    LogicVRegister dst2,
+                    int index,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*2))
+    return;
+  int esize = LaneSizeInBytesFromFormat(vform);
+  dst.WriteUintToMem(vform, index, addr);
+  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
+}
+
+
+void Simulator::st3(VectorFormat vform,
+                    LogicVRegister dst,
+                    LogicVRegister dst2,
+                    LogicVRegister dst3,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, 16*3))
+    return;
+  int esize = LaneSizeInBytesFromFormat(vform);
+  uint64_t addr2 = addr + esize;
+  uint64_t addr3 = addr2 + esize;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.WriteUintToMem(vform, i, addr);
+    dst2.WriteUintToMem(vform, i, addr2);
+    dst3.WriteUintToMem(vform, i, addr3);
+    addr += 3 * esize;
+    addr2 += 3 * esize;
+    addr3 += 3 * esize;
+  }
+}
+
+
+void Simulator::st3(VectorFormat vform,
+                    LogicVRegister dst,
+                    LogicVRegister dst2,
+                    LogicVRegister dst3,
+                    int index,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*3))
+    return;
+  int esize = LaneSizeInBytesFromFormat(vform);
+  dst.WriteUintToMem(vform, index, addr);
+  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
+  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
+}
+
+
+void Simulator::st4(VectorFormat vform,
+                    LogicVRegister dst,
+                    LogicVRegister dst2,
+                    LogicVRegister dst3,
+                    LogicVRegister dst4,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, 16*4))
+    return;
+  int esize = LaneSizeInBytesFromFormat(vform);
+  uint64_t addr2 = addr + esize;
+  uint64_t addr3 = addr2 + esize;
+  uint64_t addr4 = addr3 + esize;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.WriteUintToMem(vform, i, addr);
+    dst2.WriteUintToMem(vform, i, addr2);
+    dst3.WriteUintToMem(vform, i, addr3);
+    dst4.WriteUintToMem(vform, i, addr4);
+    addr += 4 * esize;
+    addr2 += 4 * esize;
+    addr3 += 4 * esize;
+    addr4 += 4 * esize;
+  }
+}
+
+
+void Simulator::st4(VectorFormat vform,
+                    LogicVRegister dst,
+                    LogicVRegister dst2,
+                    LogicVRegister dst3,
+                    LogicVRegister dst4,
+                    int index,
+                    uint64_t addr) {
+  if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*4))
+    return;
+  int esize = LaneSizeInBytesFromFormat(vform);
+  dst.WriteUintToMem(vform, index, addr);
+  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
+  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
+  dst4.WriteUintToMem(vform, index, addr + 3 * esize);
+}
+
+
+LogicVRegister Simulator::cmp(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2,
+                              Condition cond) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    int64_t  sa = src1.Int(vform, i);
+    int64_t  sb = src2.Int(vform, i);
+    uint64_t ua = src1.Uint(vform, i);
+    uint64_t ub = src2.Uint(vform, i);
+    bool result = false;
+    switch (cond) {
+      case eq: result = (ua == ub); break;
+      case ge: result = (sa >= sb); break;
+      case gt: result = (sa > sb) ; break;
+      case hi: result = (ua > ub) ; break;
+      case hs: result = (ua >= ub); break;
+      case lt: result = (sa < sb) ; break;
+      case le: result = (sa <= sb); break;
+      default: VIXL_UNREACHABLE(); break;
+    }
+    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::cmp(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              int imm,
+                              Condition cond) {
+  SimVRegister temp;
+  LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
+  return cmp(vform, dst, src1, imm_reg, cond);
+}
+
+
+LogicVRegister Simulator::cmptst(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t ua = src1.Uint(vform, i);
+    uint64_t ub = src2.Uint(vform, i);
+    dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::add(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    // Test for unsigned saturation.
+    uint64_t ua = src1.UintLeftJustified(vform, i);
+    uint64_t ub = src2.UintLeftJustified(vform, i);
+    uint64_t ur = ua + ub;
+    if (ur < ua) {
+      dst.SetUnsignedSat(i, true);
+    }
+
+    // Test for signed saturation.
+    int64_t sa = src1.IntLeftJustified(vform, i);
+    int64_t sb = src2.IntLeftJustified(vform, i);
+    int64_t sr = sa + sb;
+    // If the signs of the operands are the same, but different from the result,
+    // there was an overflow.
+    if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
+      dst.SetSignedSat(i, sa >= 0);
+    }
+
+    dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::addp(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uzp1(vform, temp1, src1, src2);
+  uzp2(vform, temp2, src1, src2);
+  add(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::mla(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  SimVRegister temp;
+  mul(vform, temp, src1, src2);
+  add(vform, dst, dst, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::mls(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  SimVRegister temp;
+  mul(vform, temp, src1, src2);
+  sub(vform, dst, dst, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::mul(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::mul(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2,
+                              int index) {
+  SimVRegister temp;
+  VectorFormat indexform = VectorFormatFillQ(vform);
+  return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::mla(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2,
+                              int index) {
+  SimVRegister temp;
+  VectorFormat indexform = VectorFormatFillQ(vform);
+  return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::mls(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2,
+                              int index) {
+  SimVRegister temp;
+  VectorFormat indexform = VectorFormatFillQ(vform);
+  return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smull(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smull2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umull(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umull2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smlal(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smlal2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umlal(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umlal2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smlsl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smlsl2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umlsl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umlsl2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+               VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmull(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmull2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmlal(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  int index) {
+  SimVRegister temp;
+  VectorFormat indexform =
+      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+  return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmulh(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  int index) {
+  SimVRegister temp;
+  VectorFormat indexform = VectorFormatFillQ(vform);
+  return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  int index) {
+  SimVRegister temp;
+  VectorFormat indexform = VectorFormatFillQ(vform);
+  return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
+  uint16_t result = 0;
+  uint16_t extended_op2 = op2;
+  for (int i = 0; i < 8; ++i) {
+    if ((op1 >> i) & 1) {
+      result = result ^ (extended_op2 << i);
+    }
+  }
+  return result;
+}
+
+
+LogicVRegister Simulator::pmul(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i,
+                PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::pmull(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  VectorFormat vform_src = VectorFormatHalfWidth(vform);
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
+                                         src2.Uint(vform_src, i)));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::pmull2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
+  dst.ClearForWrite(vform);
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count; i++) {
+    dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
+                                         src2.Uint(vform_src, lane_count + i)));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::sub(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    // Test for unsigned saturation.
+    if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
+      dst.SetUnsignedSat(i, false);
+    }
+
+    // Test for signed saturation.
+    int64_t sa = src1.IntLeftJustified(vform, i);
+    int64_t sb = src2.IntLeftJustified(vform, i);
+    int64_t sr = sa - sb;
+    // If the signs of the operands are different, and the sign of the first
+    // operand doesn't match the result, there was an overflow.
+    if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
+      dst.SetSignedSat(i, sr < 0);
+    }
+
+    dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::and_(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::orr(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::orn(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::eor(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::bic(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::bic(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              uint64_t imm) {
+  uint64_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    result[i] = src.Uint(vform, i) & ~imm;
+  }
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::bif(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t operand1 = dst.Uint(vform, i);
+    uint64_t operand2 = ~src2.Uint(vform, i);
+    uint64_t operand3 = src1.Uint(vform, i);
+    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
+    dst.SetUint(vform, i, result);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::bit(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t operand1 = dst.Uint(vform, i);
+    uint64_t operand2 = src2.Uint(vform, i);
+    uint64_t operand3 = src1.Uint(vform, i);
+    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
+    dst.SetUint(vform, i, result);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::bsl(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t operand1 = src2.Uint(vform, i);
+    uint64_t operand2 = dst.Uint(vform, i);
+    uint64_t operand3 = src1.Uint(vform, i);
+    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
+    dst.SetUint(vform, i, result);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::sminmax(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  bool max) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    int64_t src1_val = src1.Int(vform, i);
+    int64_t src2_val = src2.Int(vform, i);
+    int64_t dst_val;
+    if (max == true) {
+      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
+    } else {
+      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
+    }
+    dst.SetInt(vform, i, dst_val);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::smax(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  return sminmax(vform, dst, src1, src2, true);
+}
+
+
+LogicVRegister Simulator::smin(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  return sminmax(vform, dst, src1, src2, false);
+}
+
+
+LogicVRegister Simulator::sminmaxp(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   int dst_index,
+                                   const LogicVRegister& src,
+                                   bool max) {
+  for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
+    int64_t src1_val = src.Int(vform, i);
+    int64_t src2_val = src.Int(vform, i + 1);
+    int64_t dst_val;
+    if (max == true) {
+      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
+    } else {
+      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
+    }
+    dst.SetInt(vform, dst_index + (i >> 1), dst_val);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::smaxp(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  sminmaxp(vform, dst, 0, src1, true);
+  sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sminp(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  sminmaxp(vform, dst, 0, src1, false);
+  sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
+  return dst;
+}
+
+
+LogicVRegister Simulator::addp(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  VIXL_ASSERT(vform == kFormatD);
+
+  int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
+  dst.ClearForWrite(vform);
+  dst.SetInt(vform, 0, dst_val);
+  return dst;
+}
+
+
+LogicVRegister Simulator::addv(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  VectorFormat vform_dst
+    = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+
+
+  int64_t dst_val = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst_val += src.Int(vform, i);
+  }
+
+  dst.ClearForWrite(vform_dst);
+  dst.SetInt(vform_dst, 0, dst_val);
+  return dst;
+}
+
+
+LogicVRegister Simulator::saddlv(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  VectorFormat vform_dst
+    = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
+
+  int64_t dst_val = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst_val += src.Int(vform, i);
+  }
+
+  dst.ClearForWrite(vform_dst);
+  dst.SetInt(vform_dst, 0, dst_val);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uaddlv(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  VectorFormat vform_dst
+    = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
+
+  uint64_t dst_val = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst_val += src.Uint(vform, i);
+  }
+
+  dst.ClearForWrite(vform_dst);
+  dst.SetUint(vform_dst, 0, dst_val);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sminmaxv(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src,
+                                   bool max) {
+  dst.ClearForWrite(vform);
+  int64_t dst_val = max ? INT64_MIN : INT64_MAX;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    int64_t src_val = src.Int(vform, i);
+    if (max == true) {
+      dst_val = (src_val > dst_val) ? src_val : dst_val;
+    } else {
+      dst_val = (src_val < dst_val) ? src_val : dst_val;
+    }
+  }
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetInt(vform, i, 0);
+  }
+  dst.SetInt(vform, 0, dst_val);
+  return dst;
+}
+
+
+LogicVRegister Simulator::smaxv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  sminmaxv(vform, dst, src, true);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sminv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  sminmaxv(vform, dst, src, false);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uminmax(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  bool max) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t src1_val = src1.Uint(vform, i);
+    uint64_t src2_val = src2.Uint(vform, i);
+    uint64_t dst_val;
+    if (max == true) {
+      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
+    } else {
+      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
+    }
+    dst.SetUint(vform, i, dst_val);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::umax(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  return uminmax(vform, dst, src1, src2, true);
+}
+
+
+LogicVRegister Simulator::umin(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  return uminmax(vform, dst, src1, src2, false);
+}
+
+
+LogicVRegister Simulator::uminmaxp(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   int dst_index,
+                                   const LogicVRegister& src,
+                                   bool max) {
+  for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
+    uint64_t src1_val = src.Uint(vform, i);
+    uint64_t src2_val = src.Uint(vform, i + 1);
+    uint64_t dst_val;
+    if (max == true) {
+      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
+    } else {
+      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
+    }
+    dst.SetUint(vform, dst_index + (i >> 1), dst_val);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::umaxp(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  uminmaxp(vform, dst, 0, src1, true);
+  uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uminp(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  uminmaxp(vform, dst, 0, src1, false);
+  uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uminmaxv(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src,
+                                   bool max) {
+  dst.ClearForWrite(vform);
+  uint64_t dst_val = max ? 0 : UINT64_MAX;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t src_val = src.Uint(vform, i);
+    if (max == true) {
+      dst_val = (src_val > dst_val) ? src_val : dst_val;
+    } else {
+      dst_val = (src_val < dst_val) ? src_val : dst_val;
+    }
+  }
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, 0);
+  }
+  dst.SetUint(vform, 0, dst_val);
+  return dst;
+}
+
+
+LogicVRegister Simulator::umaxv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  uminmaxv(vform, dst, src, true);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uminv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  uminmaxv(vform, dst, src, false);
+  return dst;
+}
+
+
+LogicVRegister Simulator::shl(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp;
+  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
+  return ushl(vform, dst, src, shiftreg);
+}
+
+
+LogicVRegister Simulator::sshll(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp1, temp2;
+  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
+  LogicVRegister extendedreg = sxtl(vform, temp2, src);
+  return sshl(vform, dst, extendedreg, shiftreg);
+}
+
+
+LogicVRegister Simulator::sshll2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp1, temp2;
+  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
+  LogicVRegister extendedreg = sxtl2(vform, temp2, src);
+  return sshl(vform, dst, extendedreg, shiftreg);
+}
+
+
+LogicVRegister Simulator::shll(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  int shift = LaneSizeInBitsFromFormat(vform) / 2;
+  return sshll(vform, dst, src, shift);
+}
+
+
+LogicVRegister Simulator::shll2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  int shift = LaneSizeInBitsFromFormat(vform) / 2;
+  return sshll2(vform, dst, src, shift);
+}
+
+
+LogicVRegister Simulator::ushll(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp1, temp2;
+  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
+  LogicVRegister extendedreg = uxtl(vform, temp2, src);
+  return ushl(vform, dst, extendedreg, shiftreg);
+}
+
+
+LogicVRegister Simulator::ushll2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp1, temp2;
+  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
+  LogicVRegister extendedreg = uxtl2(vform, temp2, src);
+  return ushl(vform, dst, extendedreg, shiftreg);
+}
+
+
+LogicVRegister Simulator::sli(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              int shift) {
+  dst.ClearForWrite(vform);
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; i++) {
+    uint64_t src_lane = src.Uint(vform, i);
+    uint64_t dst_lane = dst.Uint(vform, i);
+    uint64_t shifted = src_lane << shift;
+    uint64_t mask = MaxUintFromFormat(vform) << shift;
+    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::sqshl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp;
+  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
+  return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqshl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp;
+  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
+  return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqshlu(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp;
+  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
+  return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sri(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              int shift) {
+  dst.ClearForWrite(vform);
+  int laneCount = LaneCountFromFormat(vform);
+  VIXL_ASSERT((shift > 0) &&
+              (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
+  for (int i = 0; i < laneCount; i++) {
+    uint64_t src_lane = src.Uint(vform, i);
+    uint64_t dst_lane = dst.Uint(vform, i);
+    uint64_t shifted;
+    uint64_t mask;
+    if (shift == 64) {
+      shifted = 0;
+      mask = 0;
+    } else {
+      shifted = src_lane >> shift;
+      mask = MaxUintFromFormat(vform) >> shift;
+    }
+    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::ushr(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src,
+                               int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp;
+  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
+  return ushl(vform, dst, src, shiftreg);
+}
+
+
+LogicVRegister Simulator::sshr(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src,
+                               int shift) {
+  VIXL_ASSERT(shift >= 0);
+  SimVRegister temp;
+  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
+  return sshl(vform, dst, src, shiftreg);
+}
+
+
+LogicVRegister Simulator::ssra(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src,
+                               int shift) {
+  SimVRegister temp;
+  LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
+  return add(vform, dst, dst, shifted_reg);
+}
+
+
+LogicVRegister Simulator::usra(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src,
+                               int shift) {
+  SimVRegister temp;
+  LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
+  return add(vform, dst, dst, shifted_reg);
+}
+
+
+LogicVRegister Simulator::srsra(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int shift) {
+  SimVRegister temp;
+  LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
+  return add(vform, dst, dst, shifted_reg);
+}
+
+
+LogicVRegister Simulator::ursra(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int shift) {
+  SimVRegister temp;
+  LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
+  return add(vform, dst, dst, shifted_reg);
+}
+
+
+LogicVRegister Simulator::cls(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src) {
+  uint64_t result[16];
+  int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; i++) {
+    result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::clz(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src) {
+  uint64_t result[16];
+  int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; i++) {
+    result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::cnt(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src) {
+  uint64_t result[16];
+  int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; i++) {
+    uint64_t value = src.Uint(vform, i);
+    result[i] = 0;
+    for (int j = 0; j < laneSizeInBits; j++) {
+      result[i] += (value & 1);
+      value >>= 1;
+    }
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::sshl(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    int8_t shift_val = src2.Int(vform, i);
+    int64_t lj_src_val = src1.IntLeftJustified(vform, i);
+
+    // Set signed saturation state.
+    if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
+        (lj_src_val != 0)) {
+      dst.SetSignedSat(i, lj_src_val >= 0);
+    }
+
+    // Set unsigned saturation state.
+    if (lj_src_val < 0) {
+      dst.SetUnsignedSat(i, false);
+    } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
+               (lj_src_val != 0)) {
+      dst.SetUnsignedSat(i, true);
+    }
+
+    int64_t src_val = src1.Int(vform, i);
+    if (shift_val > 63) {
+      dst.SetInt(vform, i, 0);
+    } else if (shift_val < -63) {
+      dst.SetRounding(i, src_val < 0);
+      dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
+    } else {
+      if (shift_val < 0) {
+        // Set rounding state. Rounding only needed on right shifts.
+        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
+          dst.SetRounding(i, true);
+        }
+        src_val >>= -shift_val;
+      } else {
+        src_val <<= shift_val;
+      }
+      dst.SetInt(vform, i, src_val);
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::ushl(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    int8_t shift_val = src2.Int(vform, i);
+    uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
+
+    // Set saturation state.
+    if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
+      dst.SetUnsignedSat(i, true);
+    }
+
+    uint64_t src_val = src1.Uint(vform, i);
+    if ((shift_val > 63) || (shift_val < -64)) {
+      dst.SetUint(vform, i, 0);
+    } else {
+      if (shift_val < 0) {
+        // Set rounding state. Rounding only needed on right shifts.
+        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
+          dst.SetRounding(i, true);
+        }
+
+        if (shift_val == -64) {
+          src_val = 0;
+        } else {
+          src_val >>= -shift_val;
+        }
+      } else {
+        src_val <<= shift_val;
+      }
+      dst.SetUint(vform, i, src_val);
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::neg(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    // Test for signed saturation.
+    int64_t sa = src.Int(vform, i);
+    if (sa == MinIntFromFormat(vform)) {
+      dst.SetSignedSat(i, true);
+    }
+    dst.SetInt(vform, i, -sa);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::suqadd(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    int64_t  sa = dst.IntLeftJustified(vform, i);
+    uint64_t ub = src.UintLeftJustified(vform, i);
+    int64_t  sr = sa + ub;
+
+    if (sr < sa) {  // Test for signed positive saturation.
+      dst.SetInt(vform, i, MaxIntFromFormat(vform));
+    } else {
+      dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::usqadd(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t  ua = dst.UintLeftJustified(vform, i);
+    int64_t   sb = src.IntLeftJustified(vform, i);
+    uint64_t  ur = ua + sb;
+
+    if ((sb > 0) && (ur <= ua)) {
+      dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
+    } else if ((sb < 0) && (ur >= ua)) {
+      dst.SetUint(vform, i, 0);                         // Negative saturation.
+    } else {
+      dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::abs(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    // Test for signed saturation.
+    int64_t sa = src.Int(vform, i);
+    if (sa == MinIntFromFormat(vform)) {
+      dst.SetSignedSat(i, true);
+    }
+    if (sa < 0) {
+      dst.SetInt(vform, i, -sa);
+    } else {
+      dst.SetInt(vform, i, sa);
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
+                                        LogicVRegister dst,
+                                        bool dstIsSigned,
+                                        const LogicVRegister& src,
+                                        bool srcIsSigned) {
+  bool upperhalf = false;
+  VectorFormat srcform = kFormatUndefined;
+  int64_t  ssrc[8];
+  uint64_t usrc[8];
+
+  switch (dstform) {
+    case kFormat8B : upperhalf = false; srcform = kFormat8H; break;
+    case kFormat16B: upperhalf = true;  srcform = kFormat8H; break;
+    case kFormat4H : upperhalf = false; srcform = kFormat4S; break;
+    case kFormat8H : upperhalf = true;  srcform = kFormat4S; break;
+    case kFormat2S : upperhalf = false; srcform = kFormat2D; break;
+    case kFormat4S : upperhalf = true;  srcform = kFormat2D; break;
+    case kFormatB  : upperhalf = false; srcform = kFormatH;  break;
+    case kFormatH  : upperhalf = false; srcform = kFormatS;  break;
+    case kFormatS  : upperhalf = false; srcform = kFormatD;  break;
+    default:VIXL_UNIMPLEMENTED();
+  }
+
+  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
+    ssrc[i] = src.Int(srcform, i);
+    usrc[i] = src.Uint(srcform, i);
+  }
+
+  int offset;
+  if (upperhalf) {
+    offset = LaneCountFromFormat(dstform) / 2;
+  } else {
+    offset = 0;
+    dst.ClearForWrite(dstform);
+  }
+
+  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
+    // Test for signed saturation
+    if (ssrc[i] > MaxIntFromFormat(dstform)) {
+      dst.SetSignedSat(offset + i, true);
+    } else if (ssrc[i] < MinIntFromFormat(dstform)) {
+      dst.SetSignedSat(offset + i, false);
+    }
+
+    // Test for unsigned saturation
+    if (srcIsSigned) {
+      if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
+        dst.SetUnsignedSat(offset + i, true);
+      } else if (ssrc[i] < 0) {
+        dst.SetUnsignedSat(offset + i, false);
+      }
+    } else {
+      if (usrc[i] > MaxUintFromFormat(dstform)) {
+        dst.SetUnsignedSat(offset + i, true);
+      }
+    }
+
+    int64_t result;
+    if (srcIsSigned) {
+      result = ssrc[i] & MaxUintFromFormat(dstform);
+    } else {
+      result = usrc[i] & MaxUintFromFormat(dstform);
+    }
+
+    if (dstIsSigned) {
+      dst.SetInt(dstform, offset + i, result);
+    } else {
+      dst.SetUint(dstform, offset + i, result);
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::xtn(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src) {
+  return extractnarrow(vform, dst, true, src, true);
+}
+
+
+LogicVRegister Simulator::sqxtn(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqxtun(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqxtn(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::absdiff(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  bool issigned) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (issigned) {
+      int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
+      sr = sr > 0 ? sr : -sr;
+      dst.SetInt(vform, i, sr);
+    } else {
+      int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
+      sr = sr > 0 ? sr : -sr;
+      dst.SetUint(vform, i, sr);
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::saba(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  SimVRegister temp;
+  dst.ClearForWrite(vform);
+  absdiff(vform, temp, src1, src2, true);
+  add(vform, dst, dst, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uaba(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  SimVRegister temp;
+  dst.ClearForWrite(vform);
+  absdiff(vform, temp, src1, src2, false);
+  add(vform, dst, dst, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::not_(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, ~src.Uint(vform, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::rbit(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  uint64_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
+  uint64_t reversed_value;
+  uint64_t value;
+  for (int i = 0; i < laneCount; i++) {
+    value = src.Uint(vform, i);
+    reversed_value = 0;
+    for (int j = 0; j < laneSizeInBits; j++) {
+      reversed_value = (reversed_value << 1) | (value & 1);
+      value >>= 1;
+    }
+    result[i] = reversed_value;
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::rev(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              int revSize) {
+  uint64_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  int laneSize = LaneSizeInBytesFromFormat(vform);
+  int lanesPerLoop =  revSize / laneSize;
+  for (int i = 0; i < laneCount; i += lanesPerLoop) {
+    for (int j = 0; j < lanesPerLoop; j++) {
+      result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
+    }
+  }
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::rev16(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  return rev(vform, dst, src, 2);
+}
+
+
+LogicVRegister Simulator::rev32(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  return rev(vform, dst, src, 4);
+}
+
+
+LogicVRegister Simulator::rev64(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  return rev(vform, dst, src, 8);
+}
+
+
+LogicVRegister Simulator::addlp(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 bool is_signed,
+                                 bool do_accumulate) {
+  VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
+
+  int64_t  sr[16];
+  uint64_t ur[16];
+
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    if (is_signed) {
+      sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
+    } else {
+      ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
+    }
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    if (do_accumulate) {
+      if (is_signed) {
+        dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
+      } else {
+        dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
+      }
+    } else {
+      if (is_signed) {
+        dst.SetInt(vform, i, sr[i]);
+      } else {
+        dst.SetUint(vform, i, ur[i]);
+      }
+    }
+  }
+
+  return dst;
+}
+
+
+LogicVRegister Simulator::saddlp(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  return addlp(vform, dst, src, true, false);
+}
+
+
+LogicVRegister Simulator::uaddlp(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  return addlp(vform, dst, src, false, false);
+}
+
+
+LogicVRegister Simulator::sadalp(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  return addlp(vform, dst, src, true, true);
+}
+
+
+LogicVRegister Simulator::uadalp(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  return addlp(vform, dst, src, false, true);
+}
+
+
+LogicVRegister Simulator::ext(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2,
+                              int index) {
+  uint8_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount - index; ++i) {
+    result[i] = src1.Uint(vform, i + index);
+  }
+  for (int i = 0; i < index; ++i) {
+    result[laneCount - index + i] = src2.Uint(vform, i);
+  }
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::dup_element(VectorFormat vform,
+                                      LogicVRegister dst,
+                                      const LogicVRegister& src,
+                                      int src_index) {
+  int laneCount = LaneCountFromFormat(vform);
+  uint64_t value = src.Uint(vform, src_index);
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, value);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::dup_immediate(VectorFormat vform,
+                                        LogicVRegister dst,
+                                        uint64_t imm) {
+  int laneCount = LaneCountFromFormat(vform);
+  uint64_t value = imm & MaxUintFromFormat(vform);
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, value);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::ins_element(VectorFormat vform,
+                                      LogicVRegister dst,
+                                      int dst_index,
+                                      const LogicVRegister& src,
+                                      int src_index) {
+  dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
+  return dst;
+}
+
+
+LogicVRegister Simulator::ins_immediate(VectorFormat vform,
+                                        LogicVRegister dst,
+                                        int dst_index,
+                                        uint64_t imm) {
+  uint64_t value = imm & MaxUintFromFormat(vform);
+  dst.SetUint(vform, dst_index, value);
+  return dst;
+}
+
+
+LogicVRegister Simulator::mov(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+    dst.SetUint(vform, lane, src.Uint(vform, lane));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::movi(VectorFormat vform,
+                               LogicVRegister dst,
+                               uint64_t imm) {
+  int laneCount = LaneCountFromFormat(vform);
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, imm);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::mvni(VectorFormat vform,
+                               LogicVRegister dst,
+                               uint64_t imm) {
+  int laneCount = LaneCountFromFormat(vform);
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, ~imm);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::orr(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              uint64_t imm) {
+  uint64_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    result[i] = src.Uint(vform, i) | imm;
+  }
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::uxtl(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  VectorFormat vform_half = VectorFormatHalfWidth(vform);
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, src.Uint(vform_half, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::sxtl(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  VectorFormat vform_half = VectorFormatHalfWidth(vform);
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetInt(vform, i, src.Int(vform_half, i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::uxtl2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  VectorFormat vform_half = VectorFormatHalfWidth(vform);
+  int lane_count = LaneCountFromFormat(vform);
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < lane_count; i++) {
+    dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::sxtl2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  VectorFormat vform_half = VectorFormatHalfWidth(vform);
+  int lane_count = LaneCountFromFormat(vform);
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < lane_count; i++) {
+    dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::shrn(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src,
+                               int shift) {
+  SimVRegister temp;
+  VectorFormat vform_src = VectorFormatDoubleWidth(vform);
+  VectorFormat vform_dst = vform;
+  LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
+  return extractnarrow(vform_dst, dst, false, shifted_src, false);
+}
+
+
+LogicVRegister Simulator::shrn2(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
+  return extractnarrow(vformdst, dst, false, shifted_src, false);
+}
+
+
+LogicVRegister Simulator::rshrn(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
+  return extractnarrow(vformdst, dst, false, shifted_src, false);
+}
+
+
+LogicVRegister Simulator::rshrn2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
+  return extractnarrow(vformdst, dst, false, shifted_src, false);
+}
+
+
+LogicVRegister Simulator::tbl(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& tab,
+                              const LogicVRegister& ind) {
+  SimVRegister result;
+  movi(vform, result, 0);
+  tbx(vform, result, tab, ind);
+  return orr(vform, dst, result, result);
+}
+
+
+LogicVRegister Simulator::tbl(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& tab,
+                              const LogicVRegister& tab2,
+                              const LogicVRegister& ind) {
+  SimVRegister result;
+  movi(vform, result, 0);
+  tbx(vform, result, tab, tab2, ind);
+  return orr(vform, dst, result, result);
+}
+
+
+LogicVRegister Simulator::tbl(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& tab,
+                              const LogicVRegister& tab2,
+                              const LogicVRegister& tab3,
+                              const LogicVRegister& ind) {
+  SimVRegister result;
+  movi(vform, result, 0);
+  tbx(vform, result, tab, tab2, tab3, ind);
+  return orr(vform, dst, result, result);
+}
+
+
+LogicVRegister Simulator::tbl(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& tab,
+                              const LogicVRegister& tab2,
+                              const LogicVRegister& tab3,
+                              const LogicVRegister& tab4,
+                              const LogicVRegister& ind) {
+  SimVRegister result;
+  movi(vform, result, 0);
+  tbx(vform, result, tab, tab2, tab3, tab4, ind);
+  return orr(vform, dst, result, result);
+}
+
+
+LogicVRegister Simulator::tbx(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& tab,
+                              const LogicVRegister& ind) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t j = ind.Uint(vform, i);
+    switch (j >> 4) {
+      case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::tbx(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& tab,
+                              const LogicVRegister& tab2,
+                              const LogicVRegister& ind) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t j = ind.Uint(vform, i);
+    switch (j >> 4) {
+      case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
+      case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::tbx(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& tab,
+                              const LogicVRegister& tab2,
+                              const LogicVRegister& tab3,
+                              const LogicVRegister& ind) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t j = ind.Uint(vform, i);
+    switch (j >> 4) {
+      case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
+      case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
+      case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::tbx(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& tab,
+                              const LogicVRegister& tab2,
+                              const LogicVRegister& tab3,
+                              const LogicVRegister& tab4,
+                              const LogicVRegister& ind) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t j = ind.Uint(vform, i);
+    switch (j >> 4) {
+      case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
+      case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
+      case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
+      case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break;
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::uqshrn(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 int shift) {
+  return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqshrn2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src,
+                                  int shift) {
+  return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqrshrn(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src,
+                                  int shift) {
+  return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src,
+                                   int shift) {
+  return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqshrn(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
+  return sqxtn(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqshrn2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src,
+                                  int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
+  return sqxtn(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqrshrn(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src,
+                                  int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
+  return sqxtn(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src,
+                                   int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
+  return sqxtn(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqshrun(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src,
+                                  int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
+  return sqxtun(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqshrun2(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src,
+                                   int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
+  return sqxtun(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqrshrun(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src,
+                                   int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
+  return sqxtun(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
+                                    LogicVRegister dst,
+                                    const LogicVRegister& src,
+                                    int shift) {
+  SimVRegister temp;
+  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+  VectorFormat vformdst = vform;
+  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
+  return sqxtun(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::uaddl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl(vform, temp1, src1);
+  uxtl(vform, temp2, src2);
+  add(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uaddl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl2(vform, temp1, src1);
+  uxtl2(vform, temp2, src2);
+  add(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uaddw(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp;
+  uxtl(vform, temp, src2);
+  add(vform, dst, src1, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uaddw2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp;
+  uxtl2(vform, temp, src2);
+  add(vform, dst, src1, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::saddl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl(vform, temp1, src1);
+  sxtl(vform, temp2, src2);
+  add(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::saddl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl2(vform, temp1, src1);
+  sxtl2(vform, temp2, src2);
+  add(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::saddw(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp;
+  sxtl(vform, temp, src2);
+  add(vform, dst, src1, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::saddw2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp;
+  sxtl2(vform, temp, src2);
+  add(vform, dst, src1, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::usubl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl(vform, temp1, src1);
+  uxtl(vform, temp2, src2);
+  sub(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::usubl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl2(vform, temp1, src1);
+  uxtl2(vform, temp2, src2);
+  sub(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::usubw(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp;
+  uxtl(vform, temp, src2);
+  sub(vform, dst, src1, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::usubw2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp;
+  uxtl2(vform, temp, src2);
+  sub(vform, dst, src1, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::ssubl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl(vform, temp1, src1);
+  sxtl(vform, temp2, src2);
+  sub(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::ssubl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl2(vform, temp1, src1);
+  sxtl2(vform, temp2, src2);
+  sub(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::ssubw(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp;
+  sxtl(vform, temp, src2);
+  sub(vform, dst, src1, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::ssubw2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp;
+  sxtl2(vform, temp, src2);
+  sub(vform, dst, src1, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uabal(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl(vform, temp1, src1);
+  uxtl(vform, temp2, src2);
+  uaba(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uabal2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl2(vform, temp1, src1);
+  uxtl2(vform, temp2, src2);
+  uaba(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sabal(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl(vform, temp1, src1);
+  sxtl(vform, temp2, src2);
+  saba(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sabal2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl2(vform, temp1, src1);
+  sxtl2(vform, temp2, src2);
+  saba(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uabdl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl(vform, temp1, src1);
+  uxtl(vform, temp2, src2);
+  absdiff(vform, dst, temp1, temp2, false);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uabdl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl2(vform, temp1, src1);
+  uxtl2(vform, temp2, src2);
+  absdiff(vform, dst, temp1, temp2, false);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sabdl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl(vform, temp1, src1);
+  sxtl(vform, temp2, src2);
+  absdiff(vform, dst, temp1, temp2, true);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sabdl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl2(vform, temp1, src1);
+  sxtl2(vform, temp2, src2);
+  absdiff(vform, dst, temp1, temp2, true);
+  return dst;
+}
+
+
+LogicVRegister Simulator::umull(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl(vform, temp1, src1);
+  uxtl(vform, temp2, src2);
+  mul(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::umull2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl2(vform, temp1, src1);
+  uxtl2(vform, temp2, src2);
+  mul(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::smull(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl(vform, temp1, src1);
+  sxtl(vform, temp2, src2);
+  mul(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::smull2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl2(vform, temp1, src1);
+  sxtl2(vform, temp2, src2);
+  mul(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::umlsl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl(vform, temp1, src1);
+  uxtl(vform, temp2, src2);
+  mls(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::umlsl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl2(vform, temp1, src1);
+  uxtl2(vform, temp2, src2);
+  mls(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::smlsl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl(vform, temp1, src1);
+  sxtl(vform, temp2, src2);
+  mls(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::smlsl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl2(vform, temp1, src1);
+  sxtl2(vform, temp2, src2);
+  mls(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::umlal(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl(vform, temp1, src1);
+  uxtl(vform, temp2, src2);
+  mla(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::umlal2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  uxtl2(vform, temp1, src1);
+  uxtl2(vform, temp2, src2);
+  mla(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::smlal(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl(vform, temp1, src1);
+  sxtl(vform, temp2, src2);
+  mla(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::smlal2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp1, temp2;
+  sxtl2(vform, temp1, src1);
+  sxtl2(vform, temp2, src2);
+  mla(vform, dst, temp1, temp2);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sqdmlal(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  SimVRegister temp;
+  LogicVRegister product = sqdmull(vform, temp, src1, src2);
+  return add(vform, dst, dst, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  SimVRegister temp;
+  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
+  return add(vform, dst, dst, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  SimVRegister temp;
+  LogicVRegister product = sqdmull(vform, temp, src1, src2);
+  return sub(vform, dst, dst, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  SimVRegister temp;
+  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
+  return sub(vform, dst, dst, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmull(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  SimVRegister temp;
+  LogicVRegister product = smull(vform, temp, src1, src2);
+  return add(vform, dst, product, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmull2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  SimVRegister temp;
+  LogicVRegister product = smull2(vform, temp, src1, src2);
+  return add(vform, dst, product, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src1,
+                                   const LogicVRegister& src2,
+                                   bool round) {
+  // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
+  // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
+  // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
+
+  int esize = LaneSizeInBitsFromFormat(vform);
+  int round_const = round ? (1 << (esize - 2)) : 0;
+  int64_t product;
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    product = src1.Int(vform, i) * src2.Int(vform, i);
+    product += round_const;
+    product = product >> (esize - 1);
+
+    if (product > MaxIntFromFormat(vform)) {
+      product = MaxIntFromFormat(vform);
+    } else if (product < MinIntFromFormat(vform)) {
+      product = MinIntFromFormat(vform);
+    }
+    dst.SetInt(vform, i, product);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::sqdmulh(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  return sqrdmulh(vform, dst, src1, src2, false);
+}
+
+
+LogicVRegister Simulator::addhn(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp;
+  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
+  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+  return dst;
+}
+
+
+LogicVRegister Simulator::addhn2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp;
+  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
+  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+  return dst;
+}
+
+
+LogicVRegister Simulator::raddhn(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp;
+  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
+  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+  return dst;
+}
+
+
+LogicVRegister Simulator::raddhn2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  SimVRegister temp;
+  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
+  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+  return dst;
+}
+
+
+LogicVRegister Simulator::subhn(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp;
+  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
+  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+  return dst;
+}
+
+
+LogicVRegister Simulator::subhn2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp;
+  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
+  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+  return dst;
+}
+
+
+LogicVRegister Simulator::rsubhn(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister temp;
+  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
+  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+  return dst;
+}
+
+
+LogicVRegister Simulator::rsubhn2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  SimVRegister temp;
+  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
+  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+  return dst;
+}
+
+
+LogicVRegister Simulator::trn1(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  uint64_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  int pairs = laneCount / 2;
+  for (int i = 0; i < pairs; ++i) {
+    result[2 * i]       = src1.Uint(vform, 2 * i);
+    result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::trn2(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  uint64_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  int pairs = laneCount / 2;
+  for (int i = 0; i < pairs; ++i) {
+    result[2 * i]       = src1.Uint(vform, (2 * i) + 1);
+    result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::zip1(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  uint64_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  int pairs = laneCount / 2;
+  for (int i = 0; i < pairs; ++i) {
+    result[2 * i]       = src1.Uint(vform, i);
+    result[(2 * i) + 1] = src2.Uint(vform, i);
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::zip2(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  uint64_t result[16];
+  int laneCount = LaneCountFromFormat(vform);
+  int pairs = laneCount / 2;
+  for (int i = 0; i < pairs; ++i) {
+    result[2 * i]       = src1.Uint(vform, pairs + i);
+    result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::uzp1(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  uint64_t result[32];
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    result[i]             = src1.Uint(vform, i);
+    result[laneCount + i] = src2.Uint(vform, i);
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[2 * i]);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::uzp2(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  uint64_t result[32];
+  int laneCount = LaneCountFromFormat(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    result[i]             = src1.Uint(vform, i);
+    result[laneCount + i] = src2.Uint(vform, i);
+  }
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < laneCount; ++i) {
+    dst.SetUint(vform, i, result[ (2 * i) + 1]);
+  }
+  return dst;
+}
+
+
+template <typename T>
+T Simulator::FPAdd(T op1, T op2) {
+  T result = FPProcessNaNs(op1, op2);
+  if (std::isnan(result)) return result;
+
+  if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
+    // inf + -inf returns the default NaN.
+    FPProcessException();
+    return FPDefaultNaN<T>();
+  } else {
+    // Other cases should be handled by standard arithmetic.
+    return op1 + op2;
+  }
+}
+
+
+template <typename T>
+T Simulator::FPSub(T op1, T op2) {
+  // NaNs should be handled elsewhere.
+  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+
+  if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
+    // inf - inf returns the default NaN.
+    FPProcessException();
+    return FPDefaultNaN<T>();
+  } else {
+    // Other cases should be handled by standard arithmetic.
+    return op1 - op2;
+  }
+}
+
+
+template <typename T>
+T Simulator::FPMul(T op1, T op2) {
+  // NaNs should be handled elsewhere.
+  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+
+  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
+    // inf * 0.0 returns the default NaN.
+    FPProcessException();
+    return FPDefaultNaN<T>();
+  } else {
+    // Other cases should be handled by standard arithmetic.
+    return op1 * op2;
+  }
+}
+
+
+template<typename T>
+T Simulator::FPMulx(T op1, T op2) {
+  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
+    // inf * 0.0 returns +/-2.0.
+    T two = 2.0;
+    return copysign(1.0, op1) * copysign(1.0, op2) * two;
+  }
+  return FPMul(op1, op2);
+}
+
+
+template<typename T>
+T Simulator::FPMulAdd(T a, T op1, T op2) {
+  T result = FPProcessNaNs3(a, op1, op2);
+
+  T sign_a = copysign(1.0, a);
+  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
+  bool isinf_prod = std::isinf(op1) || std::isinf(op2);
+  bool operation_generates_nan =
+      (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
+      (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
+      (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
+
+  if (std::isnan(result)) {
+    // Generated NaNs override quiet NaNs propagated from a.
+    if (operation_generates_nan && IsQuietNaN(a)) {
+      FPProcessException();
+      return FPDefaultNaN<T>();
+    } else {
+      return result;
+    }
+  }
+
+  // If the operation would produce a NaN, return the default NaN.
+  if (operation_generates_nan) {
+    FPProcessException();
+    return FPDefaultNaN<T>();
+  }
+
+  // Work around broken fma implementations for exact zero results: The sign of
+  // exact 0.0 results is positive unless both a and op1 * op2 are negative.
+  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
+    return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
+  }
+
+  result = FusedMultiplyAdd(op1, op2, a);
+  VIXL_ASSERT(!std::isnan(result));
+
+  // Work around broken fma implementations for rounded zero results: If a is
+  // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
+  if ((a == 0.0) && (result == 0.0)) {
+    return copysign(0.0, sign_prod);
+  }
+
+  return result;
+}
+
+
+template <typename T>
+T Simulator::FPDiv(T op1, T op2) {
+  // NaNs should be handled elsewhere.
+  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+
+  if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
+    // inf / inf and 0.0 / 0.0 return the default NaN.
+    FPProcessException();
+    return FPDefaultNaN<T>();
+  } else {
+    if (op2 == 0.0) FPProcessException();
+
+    // Other cases should be handled by standard arithmetic.
+    return op1 / op2;
+  }
+}
+
+
+template <typename T>
+T Simulator::FPSqrt(T op) {
+  if (std::isnan(op)) {
+    return FPProcessNaN(op);
+  } else if (op < 0.0) {
+    FPProcessException();
+    return FPDefaultNaN<T>();
+  } else {
+    return sqrt(op);
+  }
+}
+
+
+template <typename T>
+T Simulator::FPMax(T a, T b) {
+  T result = FPProcessNaNs(a, b);
+  if (std::isnan(result)) return result;
+
+  if ((a == 0.0) && (b == 0.0) &&
+      (copysign(1.0, a) != copysign(1.0, b))) {
+    // a and b are zero, and the sign differs: return +0.0.
+    return 0.0;
+  } else {
+    return (a > b) ? a : b;
+  }
+}
+
+
+template <typename T>
+T Simulator::FPMaxNM(T a, T b) {
+  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
+    a = kFP64NegativeInfinity;
+  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
+    b = kFP64NegativeInfinity;
+  }
+
+  T result = FPProcessNaNs(a, b);
+  return std::isnan(result) ? result : FPMax(a, b);
+}
+
+
+template <typename T>
+T Simulator::FPMin(T a, T b) {
+  T result = FPProcessNaNs(a, b);
+  if (std::isnan(result)) return result;
+
+  if ((a == 0.0) && (b == 0.0) &&
+      (copysign(1.0, a) != copysign(1.0, b))) {
+    // a and b are zero, and the sign differs: return -0.0.
+    return -0.0;
+  } else {
+    return (a < b) ? a : b;
+  }
+}
+
+
+template <typename T>
+T Simulator::FPMinNM(T a, T b) {
+  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
+    a = kFP64PositiveInfinity;
+  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
+    b = kFP64PositiveInfinity;
+  }
+
+  T result = FPProcessNaNs(a, b);
+  return std::isnan(result) ? result : FPMin(a, b);
+}
+
+
+template <typename T>
+T Simulator::FPRecipStepFused(T op1, T op2) {
+  const T two = 2.0;
+  if ((std::isinf(op1) && (op2 == 0.0))
+      || ((op1 == 0.0) && (std::isinf(op2)))) {
+    return two;
+  } else if (std::isinf(op1) || std::isinf(op2)) {
+    // Return +inf if signs match, otherwise -inf.
+    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
+                                          : kFP64NegativeInfinity;
+  } else {
+    return FusedMultiplyAdd(op1, op2, two);
+  }
+}
+
+
+template <typename T>
+T Simulator::FPRSqrtStepFused(T op1, T op2) {
+  const T one_point_five = 1.5;
+  const T two = 2.0;
+
+  if ((std::isinf(op1) && (op2 == 0.0))
+      || ((op1 == 0.0) && (std::isinf(op2)))) {
+    return one_point_five;
+  } else if (std::isinf(op1) || std::isinf(op2)) {
+    // Return +inf if signs match, otherwise -inf.
+    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
+                                          : kFP64NegativeInfinity;
+  } else {
+    // The multiply-add-halve operation must be fully fused, so avoid interim
+    // rounding by checking which operand can be losslessly divided by two
+    // before doing the multiply-add.
+    if (std::isnormal(op1 / two)) {
+      return FusedMultiplyAdd(op1 / two, op2, one_point_five);
+    } else if (std::isnormal(op2 / two)) {
+      return FusedMultiplyAdd(op1, op2 / two, one_point_five);
+    } else {
+      // Neither operand is normal after halving: the result is dominated by
+      // the addition term, so just return that.
+      return one_point_five;
+    }
+  }
+}
+
+int32_t Simulator::FPToFixedJS(double value) {
+  // The Z-flag is set when the conversion from double precision floating-point
+  // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
+  // outside the bounds of a 32-bit integer, or isn't an exact integer then the
+  // Z-flag is unset.
+  int Z = 1;
+  int32_t result;
+
+  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
+      (value == kFP64NegativeInfinity)) {
+    // +/- zero and infinity all return zero, however -0 and +/- Infinity also
+    // unset the Z-flag.
+    result = 0.0;
+    if ((value != 0.0) || std::signbit(value)) {
+      Z = 0;
+    }
+  } else if (std::isnan(value)) {
+    // NaN values unset the Z-flag and set the result to 0.
+    FPProcessNaN(value);
+    result = 0;
+    Z = 0;
+  } else {
+    // All other values are converted to an integer representation, rounded
+    // toward zero.
+    double int_result = std::floor(value);
+    double error = value - int_result;
+
+    if ((error != 0.0) && (int_result < 0.0)) {
+      int_result++;
+    }
+
+    // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
+    // write a one-liner with std::round, but the behaviour on ties is incorrect
+    // for our purposes.
+    double mod_const = static_cast<double>(UINT64_C(1) << 32);
+    double mod_error =
+        (int_result / mod_const) - std::floor(int_result / mod_const);
+    double constrained;
+    if (mod_error == 0.5) {
+      constrained = INT32_MIN;
+    } else {
+      constrained = int_result - mod_const * round(int_result / mod_const);
+    }
+
+    VIXL_ASSERT(std::floor(constrained) == constrained);
+    VIXL_ASSERT(constrained >= INT32_MIN);
+    VIXL_ASSERT(constrained <= INT32_MAX);
+
+    // Take the bottom 32 bits of the result as a 32-bit integer.
+    result = static_cast<int32_t>(constrained);
+
+    if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
+        (error != 0.0)) {
+      // If the integer result is out of range or the conversion isn't exact,
+      // take exception and unset the Z-flag.
+      FPProcessException();
+      Z = 0;
+    }
+  }
+
+  ReadNzcv().SetN(0);
+  ReadNzcv().SetZ(Z);
+  ReadNzcv().SetC(0);
+  ReadNzcv().SetV(0);
+
+  return result;
+}
+
+
+double Simulator::FPRoundInt(double value, FPRounding round_mode) {
+  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
+      (value == kFP64NegativeInfinity)) {
+    return value;
+  } else if (std::isnan(value)) {
+    return FPProcessNaN(value);
+  }
+
+  double int_result = std::floor(value);
+  double error = value - int_result;
+  switch (round_mode) {
+    case FPTieAway: {
+      // Take care of correctly handling the range ]-0.5, -0.0], which must
+      // yield -0.0.
+      if ((-0.5 < value) && (value < 0.0)) {
+        int_result = -0.0;
+
+      } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
+        // If the error is greater than 0.5, or is equal to 0.5 and the integer
+        // result is positive, round up.
+        int_result++;
+      }
+      break;
+    }
+    case FPTieEven: {
+      // Take care of correctly handling the range [-0.5, -0.0], which must
+      // yield -0.0.
+      if ((-0.5 <= value) && (value < 0.0)) {
+        int_result = -0.0;
+
+      // If the error is greater than 0.5, or is equal to 0.5 and the integer
+      // result is odd, round up.
+      } else if ((error > 0.5) ||
+          ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
+        int_result++;
+      }
+      break;
+    }
+    case FPZero: {
+      // If value>0 then we take floor(value)
+      // otherwise, ceil(value).
+      if (value < 0) {
+         int_result = ceil(value);
+      }
+      break;
+    }
+    case FPNegativeInfinity: {
+      // We always use floor(value).
+      break;
+    }
+    case FPPositiveInfinity: {
+      // Take care of correctly handling the range ]-1.0, -0.0], which must
+      // yield -0.0.
+      if ((-1.0 < value) && (value < 0.0)) {
+        int_result = -0.0;
+
+      // If the error is non-zero, round up.
+      } else if (error > 0.0) {
+        int_result++;
+      }
+      break;
+    }
+    default: VIXL_UNIMPLEMENTED();
+  }
+  return int_result;
+}
+
+
+int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
+  value = FPRoundInt(value, rmode);
+  if (value >= kWMaxInt) {
+    return kWMaxInt;
+  } else if (value < kWMinInt) {
+    return kWMinInt;
+  }
+  return std::isnan(value) ? 0 : static_cast<int32_t>(value);
+}
+
+
+int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
+  value = FPRoundInt(value, rmode);
+  // The compiler would have to round kXMaxInt, triggering a warning. Compare
+  // against the largest int64_t that is exactly representable as a double.
+  if (value > kXMaxExactInt) {
+    return kXMaxInt;
+  } else if (value < kXMinInt) {
+    return kXMinInt;
+  }
+  return std::isnan(value) ? 0 : static_cast<int64_t>(value);
+}
+
+
+uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
+  value = FPRoundInt(value, rmode);
+  if (value >= kWMaxUInt) {
+    return kWMaxUInt;
+  } else if (value < 0.0) {
+    return 0;
+  }
+  return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
+}
+
+
+uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
+  value = FPRoundInt(value, rmode);
+  // The compiler would have to round kXMaxUInt, triggering a warning. Compare
+  // against the largest uint64_t that is exactly representable as a double.
+  if (value > kXMaxExactUInt) {
+    return kXMaxUInt;
+  } else if (value < 0.0) {
+    return 0;
+  }
+  return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
+}
+
+
+#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
+template <typename T>                                            \
+LogicVRegister Simulator::FN(VectorFormat vform,                 \
+                             LogicVRegister dst,                 \
+                             const LogicVRegister& src1,         \
+                             const LogicVRegister& src2) {       \
+  dst.ClearForWrite(vform);                                      \
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {         \
+    T op1 = src1.Float<T>(i);                                    \
+    T op2 = src2.Float<T>(i);                                    \
+    T result;                                                    \
+    if (PROCNAN) {                                               \
+      result = FPProcessNaNs(op1, op2);                          \
+      if (!std::isnan(result)) {                                      \
+        result = OP(op1, op2);                                   \
+      }                                                          \
+    } else {                                                     \
+      result = OP(op1, op2);                                     \
+    }                                                            \
+    dst.SetFloat(i, result);                                     \
+  }                                                              \
+  return dst;                                                    \
+}                                                                \
+                                                                 \
+LogicVRegister Simulator::FN(VectorFormat vform,                 \
+                             LogicVRegister dst,                 \
+                             const LogicVRegister& src1,         \
+                             const LogicVRegister& src2) {       \
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {            \
+    FN<float>(vform, dst, src1, src2);                           \
+  } else {                                                       \
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);   \
+    FN<double>(vform, dst, src1, src2);                          \
+  }                                                              \
+  return dst;                                                    \
+}
+NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
+#undef DEFINE_NEON_FP_VECTOR_OP
+
+
+LogicVRegister Simulator::fnmul(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  SimVRegister temp;
+  LogicVRegister product = fmul(vform, temp, src1, src2);
+  return fneg(vform, dst, product);
+}
+
+
+template <typename T>
+LogicVRegister Simulator::frecps(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    T op1 = -src1.Float<T>(i);
+    T op2 = src2.Float<T>(i);
+    T result = FPProcessNaNs(op1, op2);
+    dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::frecps(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    frecps<float>(vform, dst, src1, src2);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    frecps<double>(vform, dst, src1, src2);
+  }
+  return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::frsqrts(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    T op1 = -src1.Float<T>(i);
+    T op2 = src2.Float<T>(i);
+    T result = FPProcessNaNs(op1, op2);
+    dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::frsqrts(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    frsqrts<float>(vform, dst, src1, src2);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    frsqrts<double>(vform, dst, src1, src2);
+  }
+  return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fcmp(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2,
+                               Condition cond) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    bool result = false;
+    T op1 = src1.Float<T>(i);
+    T op2 = src2.Float<T>(i);
+    T nan_result = FPProcessNaNs(op1, op2);
+    if (!std::isnan(nan_result)) {
+      switch (cond) {
+        case eq: result = (op1 == op2); break;
+        case ge: result = (op1 >= op2); break;
+        case gt: result = (op1 > op2) ; break;
+        case le: result = (op1 <= op2); break;
+        case lt: result = (op1 < op2) ; break;
+        default: VIXL_UNREACHABLE(); break;
+      }
+    }
+    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcmp(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2,
+                               Condition cond) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    fcmp<float>(vform, dst, src1, src2, cond);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    fcmp<double>(vform, dst, src1, src2, cond);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
+                                    LogicVRegister dst,
+                                    const LogicVRegister& src,
+                                    Condition cond) {
+  SimVRegister temp;
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
+    fcmp<float>(vform, dst, src, zero_reg, cond);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    LogicVRegister zero_reg = dup_immediate(vform, temp,
+                                            DoubleToRawbits(0.0));
+    fcmp<double>(vform, dst, src, zero_reg, cond);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fabscmp(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2,
+                                  Condition cond) {
+  SimVRegister temp1, temp2;
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
+    LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
+    fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
+    LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
+    fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
+  }
+  return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fmla(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    T op1 = src1.Float<T>(i);
+    T op2 = src2.Float<T>(i);
+    T acc = dst.Float<T>(i);
+    T result = FPMulAdd(acc, op1, op2);
+    dst.SetFloat(i, result);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fmla(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    fmla<float>(vform, dst, src1, src2);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    fmla<double>(vform, dst, src1, src2);
+  }
+  return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fmls(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    T op1 = -src1.Float<T>(i);
+    T op2 = src2.Float<T>(i);
+    T acc = dst.Float<T>(i);
+    T result = FPMulAdd(acc, op1, op2);
+    dst.SetFloat(i, result);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fmls(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    fmls<float>(vform, dst, src1, src2);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    fmls<double>(vform, dst, src1, src2);
+  }
+  return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fneg(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    T op = src.Float<T>(i);
+    op = -op;
+    dst.SetFloat(i, op);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fneg(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    fneg<float>(vform, dst, src);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    fneg<double>(vform, dst, src);
+  }
+  return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fabs_(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    T op = src.Float<T>(i);
+    if (copysign(1.0, op) < 0.0) {
+      op = -op;
+    }
+    dst.SetFloat(i, op);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fabs_(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    fabs_<float>(vform, dst, src);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    fabs_<double>(vform, dst, src);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fabd(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  SimVRegister temp;
+  fsub(vform, temp, src1, src2);
+  fabs_(vform, dst, temp);
+  return dst;
+}
+
+
+LogicVRegister Simulator::fsqrt(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      float result = FPSqrt(src.Float<float>(i));
+      dst.SetFloat(i, result);
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      double result = FPSqrt(src.Float<double>(i));
+      dst.SetFloat(i, result);
+    }
+  }
+  return dst;
+}
+
+
+#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                          \
+LogicVRegister Simulator::FNP(VectorFormat vform,                    \
+                              LogicVRegister dst,                    \
+                              const LogicVRegister& src1,            \
+                              const LogicVRegister& src2) {          \
+  SimVRegister temp1, temp2;                                         \
+  uzp1(vform, temp1, src1, src2);                                    \
+  uzp2(vform, temp2, src1, src2);                                    \
+  FN(vform, dst, temp1, temp2);                                      \
+  return dst;                                                        \
+}                                                                    \
+                                                                     \
+LogicVRegister Simulator::FNP(VectorFormat vform,                    \
+                              LogicVRegister dst,                    \
+                              const LogicVRegister& src) {           \
+  if (vform == kFormatS) {                                           \
+    float result = OP(src.Float<float>(0), src.Float<float>(1));     \
+    dst.SetFloat(0, result);                                         \
+  } else {                                                           \
+    VIXL_ASSERT(vform == kFormatD);                                  \
+    double result = OP(src.Float<double>(0), src.Float<double>(1));  \
+    dst.SetFloat(0, result);                                         \
+  }                                                                  \
+  dst.ClearForWrite(vform);                                          \
+  return dst;                                                        \
+}
+NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
+#undef DEFINE_NEON_FP_PAIR_OP
+
+
+LogicVRegister Simulator::fminmaxv(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src,
+                                   FPMinMaxOp Op) {
+  VIXL_ASSERT(vform == kFormat4S);
+  USE(vform);
+  float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
+  float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
+  float result = (this->*Op)(result1, result2);
+  dst.ClearForWrite(kFormatS);
+  dst.SetFloat<float>(0, result);
+  return dst;
+}
+
+
+LogicVRegister Simulator::fmaxv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  return fminmaxv(vform, dst, src, &Simulator::FPMax);
+}
+
+
+LogicVRegister Simulator::fminv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  return fminmaxv(vform, dst, src, &Simulator::FPMin);
+}
+
+
+LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
+}
+
+
+LogicVRegister Simulator::fminnmv(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src) {
+  return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
+}
+
+
+LogicVRegister Simulator::fmul(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2,
+                               int index) {
+  dst.ClearForWrite(vform);
+  SimVRegister temp;
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
+    fmul<float>(vform, dst, src1, index_reg);
+
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
+    fmul<double>(vform, dst, src1, index_reg);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fmla(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2,
+                               int index) {
+  dst.ClearForWrite(vform);
+  SimVRegister temp;
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
+    fmla<float>(vform, dst, src1, index_reg);
+
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
+    fmla<double>(vform, dst, src1, index_reg);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fmls(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2,
+                               int index) {
+  dst.ClearForWrite(vform);
+  SimVRegister temp;
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
+    fmls<float>(vform, dst, src1, index_reg);
+
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
+    fmls<double>(vform, dst, src1, index_reg);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fmulx(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                int index) {
+  dst.ClearForWrite(vform);
+  SimVRegister temp;
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
+    fmulx<float>(vform, dst, src1, index_reg);
+
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
+    fmulx<double>(vform, dst, src1, index_reg);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::frint(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                FPRounding rounding_mode,
+                                bool inexact_exception) {
+  dst.ClearForWrite(vform);
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      float input = src.Float<float>(i);
+      float rounded = FPRoundInt(input, rounding_mode);
+      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
+        FPProcessException();
+      }
+      dst.SetFloat<float>(i, rounded);
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      double input = src.Float<double>(i);
+      double rounded = FPRoundInt(input, rounding_mode);
+      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
+        FPProcessException();
+      }
+      dst.SetFloat<double>(i, rounded);
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcvts(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                FPRounding rounding_mode,
+                                int fbits) {
+  dst.ClearForWrite(vform);
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
+      dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      double op = src.Float<double>(i) * std::pow(2.0, fbits);
+      dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcvtu(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                FPRounding rounding_mode,
+                                int fbits) {
+  dst.ClearForWrite(vform);
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
+      dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      double op = src.Float<double>(i) * std::pow(2.0, fbits);
+      dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcvtl(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
+      // TODO: Full support for SimFloat16 in SimRegister(s).
+      dst.SetFloat(i,
+                   FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
+                             ReadDN()));
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
+      dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcvtl2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  int lane_count = LaneCountFromFormat(vform);
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    for (int i = 0; i < lane_count; i++) {
+      // TODO: Full support for SimFloat16 in SimRegister(s).
+      dst.SetFloat(i,
+                   FPToFloat(RawbitsToFloat16(
+                                 src.Float<uint16_t>(i + lane_count)),
+                             ReadDN()));
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    for (int i = 0; i < lane_count; i++) {
+      dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcvtn(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  SimVRegister tmp;
+  LogicVRegister srctmp = mov(kFormat2D, tmp, src);                                  
+  dst.ClearForWrite(vform);
+  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      dst.SetFloat(i,
+                   Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
+                                                FPTieEven,
+                                                ReadDN())));
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcvtn2(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  int lane_count = LaneCountFromFormat(vform) / 2;
+  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+    for (int i = lane_count - 1; i >= 0; i--) {
+      dst.SetFloat(i + lane_count,
+                   Float16ToRawbits(
+                       FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+    for (int i = lane_count - 1; i >= 0; i--) {
+      dst.SetFloat(i + lane_count,
+                   FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcvtxn(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  SimVRegister tmp;
+  LogicVRegister srctmp = mov(kFormat2D, tmp, src);
+  dst.ClearForWrite(vform);
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src) {
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+  int lane_count = LaneCountFromFormat(vform) / 2;
+  for (int i = lane_count - 1; i >= 0; i--) {
+    dst.SetFloat(i + lane_count,
+                 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
+  }
+  return dst;
+}
+
+
+// Based on reference C function recip_sqrt_estimate from ARM ARM.
+double Simulator::recip_sqrt_estimate(double a) {
+  int q0, q1, s;
+  double r;
+  if (a < 0.5) {
+    q0 = static_cast<int>(a * 512.0);
+    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
+  } else  {
+    q1 = static_cast<int>(a * 256.0);
+    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
+  }
+  s = static_cast<int>(256.0 * r + 0.5);
+  return static_cast<double>(s) / 256.0;
+}
+
+
+static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
+  return ExtractUnsignedBitfield64(start_bit, end_bit, val);
+}
+
+
+template <typename T>
+T Simulator::FPRecipSqrtEstimate(T op) {
+  if (std::isnan(op)) {
+    return FPProcessNaN(op);
+  } else if (op == 0.0) {
+    if (copysign(1.0, op) < 0.0) {
+      return kFP64NegativeInfinity;
+    } else {
+      return kFP64PositiveInfinity;
+    }
+  } else if (copysign(1.0, op) < 0.0) {
+    FPProcessException();
+    return FPDefaultNaN<T>();
+  } else if (std::isinf(op)) {
+    return 0.0;
+  } else {
+    uint64_t fraction;
+    int exp, result_exp;
+
+    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+      exp = FloatExp(op);
+      fraction = FloatMantissa(op);
+      fraction <<= 29;
+    } else {
+      exp = DoubleExp(op);
+      fraction = DoubleMantissa(op);
+    }
+
+    if (exp == 0) {
+      while (Bits(fraction, 51, 51) == 0) {
+        fraction = Bits(fraction, 50, 0) << 1;
+        exp -= 1;
+      }
+      fraction = Bits(fraction, 50, 0) << 1;
+    }
+
+    double scaled;
+    if (Bits(exp, 0, 0) == 0) {
+      scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
+    } else {
+      scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
+    }
+
+    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+      result_exp = (380 - exp) / 2;
+    } else {
+      result_exp = (3068 - exp) / 2;
+    }
+
+    uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
+
+    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
+      uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
+      return FloatPack(0, exp_bits, est_bits);
+    } else {
+      return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
+    }
+  }
+}
+
+
+LogicVRegister Simulator::frsqrte(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      float input = src.Float<float>(i);
+      dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      double input = src.Float<double>(i);
+      dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
+    }
+  }
+  return dst;
+}
+
+template <typename T>
+T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
+  uint32_t sign;
+
+  if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+    sign = FloatSign(op);
+  } else {
+    sign = DoubleSign(op);
+  }
+
+  if (std::isnan(op)) {
+    return FPProcessNaN(op);
+  } else if (std::isinf(op)) {
+    return (sign == 1) ? -0.0 : 0.0;
+  } else if (op == 0.0) {
+    FPProcessException();  // FPExc_DivideByZero exception.
+    return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
+  } else if (((sizeof(T) == sizeof(float)) &&  // NOLINT(runtime/sizeof)
+              (std::fabs(op) < std::pow(2.0, -128.0))) ||
+             ((sizeof(T) == sizeof(double)) &&  // NOLINT(runtime/sizeof)
+              (std::fabs(op) < std::pow(2.0, -1024.0)))) {
+    bool overflow_to_inf = false;
+    switch (rounding) {
+      case FPTieEven: overflow_to_inf = true; break;
+      case FPPositiveInfinity: overflow_to_inf = (sign == 0); break;
+      case FPNegativeInfinity: overflow_to_inf = (sign == 1); break;
+      case FPZero: overflow_to_inf = false; break;
+      default: break;
+    }
+    FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
+    if (overflow_to_inf) {
+      return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
+    } else {
+      // Return FPMaxNormal(sign).
+      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+        return FloatPack(sign, 0xfe, 0x07fffff);
+      } else {
+        return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
+      }
+    }
+  } else {
+    uint64_t fraction;
+    int exp, result_exp;
+    uint32_t sign;
+
+    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+      sign = FloatSign(op);
+      exp = FloatExp(op);
+      fraction = FloatMantissa(op);
+      fraction <<= 29;
+    } else {
+      sign = DoubleSign(op);
+      exp = DoubleExp(op);
+      fraction = DoubleMantissa(op);
+    }
+
+    if (exp == 0) {
+      if (Bits(fraction, 51, 51) == 0) {
+        exp -= 1;
+        fraction = Bits(fraction, 49, 0) << 2;
+      } else {
+        fraction = Bits(fraction, 50, 0) << 1;
+      }
+    }
+
+    double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
+
+    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+      result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
+    } else {
+      result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
+    }
+
+    double estimate = recip_estimate(scaled);
+
+    fraction = DoubleMantissa(estimate);
+    if (result_exp == 0) {
+      fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
+    } else if (result_exp == -1) {
+      fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
+      result_exp = 0;
+    }
+    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
+      uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
+      return FloatPack(sign, exp_bits, frac_bits);
+    } else {
+      return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
+    }
+  }
+}
+
+
+LogicVRegister Simulator::frecpe(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 FPRounding round) {
+  dst.ClearForWrite(vform);
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      float input = src.Float<float>(i);
+      dst.SetFloat(i, FPRecipEstimate<float>(input, round));
+    }
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      double input = src.Float<double>(i);
+      dst.SetFloat(i, FPRecipEstimate<double>(input, round));
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::ursqrte(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  uint64_t operand;
+  uint32_t result;
+  double dp_operand, dp_result;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    operand = src.Uint(vform, i);
+    if (operand <= 0x3FFFFFFF) {
+      result = 0xFFFFFFFF;
+    } else {
+      dp_operand = operand * std::pow(2.0, -32);
+      dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
+      result = static_cast<uint32_t>(dp_result);
+    }
+    dst.SetUint(vform, i, result);
+  }
+  return dst;
+}
+
+
+// Based on reference C function recip_estimate from ARM ARM.
+double Simulator::recip_estimate(double a) {
+  int q, s;
+  double r;
+  q = static_cast<int>(a * 512.0);
+  r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
+  s = static_cast<int>(256.0 * r + 0.5);
+  return static_cast<double>(s) / 256.0;
+}
+
+
+LogicVRegister Simulator::urecpe(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  uint64_t operand;
+  uint32_t result;
+  double dp_operand, dp_result;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    operand = src.Uint(vform, i);
+    if (operand <= 0x7FFFFFFF) {
+      result = 0xFFFFFFFF;
+    } else {
+      dp_operand = operand * std::pow(2.0, -32);
+      dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
+      result = static_cast<uint32_t>(dp_result);
+    }
+    dst.SetUint(vform, i, result);
+  }
+  return dst;
+}
+
+template <typename T>
+LogicVRegister Simulator::frecpx(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    T op = src.Float<T>(i);
+    T result;
+    if (std::isnan(op)) {
+       result = FPProcessNaN(op);
+    } else {
+      int exp;
+      uint32_t sign;
+      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
+        sign = FloatSign(op);
+        exp = FloatExp(op);
+        exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
+        result = FloatPack(sign, exp, 0);
+      } else {
+        sign = DoubleSign(op);
+        exp = DoubleExp(op);
+        exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
+        result = DoublePack(sign, exp, 0);
+      }
+    }
+    dst.SetFloat(i, result);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::frecpx(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src) {
+  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    frecpx<float>(vform, dst, src);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    frecpx<double>(vform, dst, src);
+  }
+  return dst;
+}
+
+LogicVRegister Simulator::scvtf(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int fbits,
+                                FPRounding round) {
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+      float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
+      dst.SetFloat<float>(i, result);
+    } else {
+      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+      double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
+      dst.SetFloat<double>(i, result);
+    }
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::ucvtf(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                int fbits,
+                                FPRounding round) {
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+      float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
+      dst.SetFloat<float>(i, result);
+    } else {
+      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+      double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
+      dst.SetFloat<double>(i, result);
+    }
+  }
+  return dst;
+}
+
+
+}  // namespace vixl
+
+#endif  // JS_SIMULATOR_ARM64
diff --git a/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp b/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp
new file mode 100644
index 0000000000..5c4a5ce145
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp
@@ -0,0 +1,2027 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/MacroAssembler-vixl.h"
+
+#include <ctype.h>
+
+namespace vixl {
+
+MacroAssembler::MacroAssembler()
+    : js::jit::Assembler(),
+      sp_(x28),
+      tmp_list_(ip0, ip1),
+      fptmp_list_(d31)
+{
+}
+
+
+void MacroAssembler::FinalizeCode() {
+  Assembler::FinalizeCode();
+}
+
+
+int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
+                                        const Register &rd,
+                                        uint64_t imm) {
+  bool emit_code = (masm != NULL);
+  VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits());
+  // The worst case for size is mov 64-bit immediate to sp:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to move to sp
+  MacroEmissionCheckScope guard(masm);
+
+  // Immediates on Aarch64 can be produced using an initial value, and zero to
+  // three move keep operations.
+  //
+  // Initial values can be generated with:
+  //  1. 64-bit move zero (movz).
+  //  2. 32-bit move inverted (movn).
+  //  3. 64-bit move inverted.
+  //  4. 32-bit orr immediate.
+  //  5. 64-bit orr immediate.
+  // Move-keep may then be used to modify each of the 16-bit half words.
+  //
+  // The code below supports all five initial value generators, and
+  // applying move-keep operations to move-zero and move-inverted initial
+  // values.
+
+  // Try to move the immediate in one instruction, and if that fails, switch to
+  // using multiple instructions.
+  if (OneInstrMoveImmediateHelper(masm, rd, imm)) {
+    return 1;
+  } else {
+    int instruction_count = 0;
+    unsigned reg_size = rd.size();
+
+    // Generic immediate case. Imm will be represented by
+    //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
+    // A move-zero or move-inverted is generated for the first non-zero or
+    // non-0xffff immX, and a move-keep for subsequent non-zero immX.
+
+    uint64_t ignored_halfword = 0;
+    bool invert_move = false;
+    // If the number of 0xffff halfwords is greater than the number of 0x0000
+    // halfwords, it's more efficient to use move-inverted.
+    if (CountClearHalfWords(~imm, reg_size) >
+        CountClearHalfWords(imm, reg_size)) {
+      ignored_halfword = 0xffff;
+      invert_move = true;
+    }
+
+    // Mov instructions can't move values into the stack pointer, so set up a
+    // temporary register, if needed.
+    UseScratchRegisterScope temps;
+    Register temp;
+    if (emit_code) {
+      temps.Open(masm);
+      temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
+    }
+
+    // Iterate through the halfwords. Use movn/movz for the first non-ignored
+    // halfword, and movk for subsequent halfwords.
+    VIXL_ASSERT((reg_size % 16) == 0);
+    bool first_mov_done = false;
+    for (unsigned i = 0; i < (temp.size() / 16); i++) {
+      uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
+      if (imm16 != ignored_halfword) {
+        if (!first_mov_done) {
+          if (invert_move) {
+            if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i);
+            instruction_count++;
+          } else {
+            if (emit_code) masm->movz(temp, imm16, 16 * i);
+            instruction_count++;
+          }
+          first_mov_done = true;
+        } else {
+          // Construct a wider constant.
+          if (emit_code) masm->movk(temp, imm16, 16 * i);
+          instruction_count++;
+        }
+      }
+    }
+
+    VIXL_ASSERT(first_mov_done);
+
+    // Move the temporary if the original destination register was the stack
+    // pointer.
+    if (rd.IsSP()) {
+      if (emit_code) masm->mov(rd, temp);
+      instruction_count++;
+    }
+    return instruction_count;
+  }
+}
+
+
+bool MacroAssembler::OneInstrMoveImmediateHelper(MacroAssembler* masm,
+                                                 const Register& dst,
+                                                 int64_t imm) {
+  bool emit_code = masm != NULL;
+  unsigned n, imm_s, imm_r;
+  int reg_size = dst.size();
+
+  if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
+    // Immediate can be represented in a move zero instruction. Movz can't write
+    // to the stack pointer.
+    if (emit_code) {
+      masm->movz(dst, imm);
+    }
+    return true;
+  } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
+    // Immediate can be represented in a move negative instruction. Movn can't
+    // write to the stack pointer.
+    if (emit_code) {
+      masm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
+    }
+    return true;
+  } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
+    // Immediate can be represented in a logical orr instruction.
+    VIXL_ASSERT(!dst.IsZero());
+    if (emit_code) {
+      masm->LogicalImmediate(
+          dst, AppropriateZeroRegFor(dst), n, imm_s, imm_r, ORR);
+    }
+    return true;
+  }
+  return false;
+}
+
+
+void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
+  VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
+              ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
+  if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
+    B(static_cast<Condition>(type), label);
+  } else {
+    switch (type) {
+      case always:        B(label);              break;
+      case never:         break;
+      case reg_zero:      Cbz(reg, label);       break;
+      case reg_not_zero:  Cbnz(reg, label);      break;
+      case reg_bit_clear: Tbz(reg, bit, label);  break;
+      case reg_bit_set:   Tbnz(reg, bit, label); break;
+      default:
+        VIXL_UNREACHABLE();
+    }
+  }
+}
+
+
+void MacroAssembler::B(Label* label) {
+  SingleEmissionCheckScope guard(this);
+  b(label);
+}
+
+
+void MacroAssembler::B(Label* label, Condition cond) {
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+  if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) {
+    Label done;
+    b(&done, InvertCondition(cond));
+    b(label);
+    bind(&done);
+  } else {
+    b(label, cond);
+  }
+}
+
+
+void MacroAssembler::Cbnz(const Register& rt, Label* label) {
+  VIXL_ASSERT(!rt.IsZero());
+  EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+  if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) {
+    Label done;
+    cbz(rt, &done);
+    b(label);
+    bind(&done);
+  } else {
+    cbnz(rt, label);
+  }
+}
+
+
+void MacroAssembler::Cbz(const Register& rt, Label* label) {
+  VIXL_ASSERT(!rt.IsZero());
+  EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+  if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) {
+    Label done;
+    cbnz(rt, &done);
+    b(label);
+    bind(&done);
+  } else {
+    cbz(rt, label);
+  }
+}
+
+
+void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
+  VIXL_ASSERT(!rt.IsZero());
+  EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+  if (label->bound() && LabelIsOutOfRange(label, TestBranchType)) {
+    Label done;
+    tbz(rt, bit_pos, &done);
+    b(label);
+    bind(&done);
+  } else {
+    tbnz(rt, bit_pos, label);
+  }
+}
+
+
+void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) {
+  VIXL_ASSERT(!rt.IsZero());
+  EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+  if (label->bound() && LabelIsOutOfRange(label, TestBranchType)) {
+    Label done;
+    tbnz(rt, bit_pos, &done);
+    b(label);
+    bind(&done);
+  } else {
+    tbz(rt, bit_pos, label);
+  }
+}
+
+
+void MacroAssembler::And(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand) {
+  LogicalMacro(rd, rn, operand, AND);
+}
+
+
+void MacroAssembler::Ands(const Register& rd,
+                          const Register& rn,
+                          const Operand& operand) {
+  LogicalMacro(rd, rn, operand, ANDS);
+}
+
+
+void MacroAssembler::Tst(const Register& rn,
+                         const Operand& operand) {
+  Ands(AppropriateZeroRegFor(rn), rn, operand);
+}
+
+
+void MacroAssembler::Bic(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand) {
+  LogicalMacro(rd, rn, operand, BIC);
+}
+
+
+void MacroAssembler::Bics(const Register& rd,
+                          const Register& rn,
+                          const Operand& operand) {
+  LogicalMacro(rd, rn, operand, BICS);
+}
+
+
+void MacroAssembler::Orr(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand) {
+  LogicalMacro(rd, rn, operand, ORR);
+}
+
+
+void MacroAssembler::Orn(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand) {
+  LogicalMacro(rd, rn, operand, ORN);
+}
+
+
+void MacroAssembler::Eor(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand) {
+  LogicalMacro(rd, rn, operand, EOR);
+}
+
+
+void MacroAssembler::Eon(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand) {
+  LogicalMacro(rd, rn, operand, EON);
+}
+
+
+void MacroAssembler::LogicalMacro(const Register& rd,
+                                  const Register& rn,
+                                  const Operand& operand,
+                                  LogicalOp op) {
+  // The worst case for size is logical immediate to sp:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to do the operation
+  //  * 1 instruction to move to sp
+  MacroEmissionCheckScope guard(this);
+  UseScratchRegisterScope temps(this);
+
+  if (operand.IsImmediate()) {
+    int64_t immediate = operand.immediate();
+    unsigned reg_size = rd.size();
+
+    // If the operation is NOT, invert the operation and immediate.
+    if ((op & NOT) == NOT) {
+      op = static_cast<LogicalOp>(op & ~NOT);
+      immediate = ~immediate;
+    }
+
+    // Ignore the top 32 bits of an immediate if we're moving to a W register.
+    if (rd.Is32Bits()) {
+      // Check that the top 32 bits are consistent.
+      VIXL_ASSERT(((immediate >> kWRegSize) == 0) ||
+                  ((immediate >> kWRegSize) == -1));
+      immediate &= kWRegMask;
+    }
+
+    VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
+
+    // Special cases for all set or all clear immediates.
+    if (immediate == 0) {
+      switch (op) {
+        case AND:
+          Mov(rd, 0);
+          return;
+        case ORR:
+          VIXL_FALLTHROUGH();
+        case EOR:
+          Mov(rd, rn);
+          return;
+        case ANDS:
+          VIXL_FALLTHROUGH();
+        case BICS:
+          break;
+        default:
+          VIXL_UNREACHABLE();
+      }
+    } else if ((rd.Is64Bits() && (immediate == -1)) ||
+               (rd.Is32Bits() && (immediate == 0xffffffff))) {
+      switch (op) {
+        case AND:
+          Mov(rd, rn);
+          return;
+        case ORR:
+          Mov(rd, immediate);
+          return;
+        case EOR:
+          Mvn(rd, rn);
+          return;
+        case ANDS:
+          VIXL_FALLTHROUGH();
+        case BICS:
+          break;
+        default:
+          VIXL_UNREACHABLE();
+      }
+    }
+
+    unsigned n, imm_s, imm_r;
+    if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
+      // Immediate can be encoded in the instruction.
+      LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
+    } else {
+      // Immediate can't be encoded: synthesize using move immediate.
+      Register temp = temps.AcquireSameSizeAs(rn);
+
+      // If the left-hand input is the stack pointer, we can't pre-shift the
+      // immediate, as the encoding won't allow the subsequent post shift.
+      PreShiftImmMode mode = rn.IsSP() ? kNoShift : kAnyShift;
+      Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode);
+
+      // VIXL can acquire temp registers. Assert that the caller is aware.
+      VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn));
+      VIXL_ASSERT(!temp.Is(operand.maybeReg()));
+
+      if (rd.Is(sp)) {
+        // If rd is the stack pointer we cannot use it as the destination
+        // register so we use the temp register as an intermediate again.
+        Logical(temp, rn, imm_operand, op);
+        Mov(sp, temp);
+      } else {
+        Logical(rd, rn, imm_operand, op);
+      }
+    }
+  } else if (operand.IsExtendedRegister()) {
+    VIXL_ASSERT(operand.reg().size() <= rd.size());
+    // Add/sub extended supports shift <= 4. We want to support exactly the
+    // same modes here.
+    VIXL_ASSERT(operand.shift_amount() <= 4);
+    VIXL_ASSERT(operand.reg().Is64Bits() ||
+           ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
+
+    temps.Exclude(operand.reg());
+    Register temp = temps.AcquireSameSizeAs(rn);
+
+    // VIXL can acquire temp registers. Assert that the caller is aware.
+    VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn));
+    VIXL_ASSERT(!temp.Is(operand.maybeReg()));
+
+    EmitExtendShift(temp, operand.reg(), operand.extend(),
+                    operand.shift_amount());
+    Logical(rd, rn, Operand(temp), op);
+  } else {
+    // The operand can be encoded in the instruction.
+    VIXL_ASSERT(operand.IsShiftedRegister());
+    Logical(rd, rn, operand, op);
+  }
+}
+
+
+void MacroAssembler::Mov(const Register& rd,
+                         const Operand& operand,
+                         DiscardMoveMode discard_mode) {
+  // The worst case for size is mov immediate with up to 4 instructions.
+  MacroEmissionCheckScope guard(this);
+
+  if (operand.IsImmediate()) {
+    // Call the macro assembler for generic immediates.
+    Mov(rd, operand.immediate());
+  } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
+    // Emit a shift instruction if moving a shifted register. This operation
+    // could also be achieved using an orr instruction (like orn used by Mvn),
+    // but using a shift instruction makes the disassembly clearer.
+    EmitShift(rd, operand.reg(), operand.shift(), operand.shift_amount());
+  } else if (operand.IsExtendedRegister()) {
+    // Emit an extend instruction if moving an extended register. This handles
+    // extend with post-shift operations, too.
+    EmitExtendShift(rd, operand.reg(), operand.extend(),
+                    operand.shift_amount());
+  } else {
+    // Otherwise, emit a register move only if the registers are distinct, or
+    // if they are not X registers.
+    //
+    // Note that mov(w0, w0) is not a no-op because it clears the top word of
+    // x0. A flag is provided (kDiscardForSameWReg) if a move between the same W
+    // registers is not required to clear the top word of the X register. In
+    // this case, the instruction is discarded.
+    //
+    // If the sp is an operand, add #0 is emitted, otherwise, orr #0.
+    if (!rd.Is(operand.reg()) || (rd.Is32Bits() &&
+                                  (discard_mode == kDontDiscardForSameWReg))) {
+      mov(rd, operand.reg());
+    }
+  }
+}
+
+
+void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
+  VIXL_ASSERT(IsUint16(imm));
+  int byte1 = (imm & 0xff);
+  int byte2 = ((imm >> 8) & 0xff);
+  if (byte1 == byte2) {
+    movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
+  } else if (byte1 == 0) {
+    movi(vd, byte2, LSL, 8);
+  } else if (byte2 == 0) {
+    movi(vd, byte1);
+  } else if (byte1 == 0xff) {
+    mvni(vd, ~byte2 & 0xff, LSL, 8);
+  } else if (byte2 == 0xff) {
+    mvni(vd, ~byte1 & 0xff);
+  } else {
+    UseScratchRegisterScope temps(this);
+    Register temp = temps.AcquireW();
+    movz(temp, imm);
+    dup(vd, temp);
+  }
+}
+
+
+void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
+  VIXL_ASSERT(IsUint32(imm));
+
+  uint8_t bytes[sizeof(imm)];
+  memcpy(bytes, &imm, sizeof(imm));
+
+  // All bytes are either 0x00 or 0xff.
+  {
+    bool all0orff = true;
+    for (int i = 0; i < 4; ++i) {
+      if ((bytes[i] != 0) && (bytes[i] != 0xff)) {
+        all0orff = false;
+        break;
+      }
+    }
+
+    if (all0orff == true) {
+      movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
+      return;
+    }
+  }
+
+  // Of the 4 bytes, only one byte is non-zero.
+  for (int i = 0; i < 4; i++) {
+    if ((imm & (0xff << (i * 8))) == imm) {
+      movi(vd, bytes[i], LSL, i * 8);
+      return;
+    }
+  }
+
+  // Of the 4 bytes, only one byte is not 0xff.
+  for (int i = 0; i < 4; i++) {
+    uint32_t mask = ~(0xff << (i * 8));
+    if ((imm & mask) == mask) {
+      mvni(vd, ~bytes[i] & 0xff, LSL, i * 8);
+      return;
+    }
+  }
+
+  // Immediate is of the form 0x00MMFFFF.
+  if ((imm & 0xff00ffff) == 0x0000ffff) {
+    movi(vd, bytes[2], MSL, 16);
+    return;
+  }
+
+  // Immediate is of the form 0x0000MMFF.
+  if ((imm & 0xffff00ff) == 0x000000ff) {
+    movi(vd, bytes[1], MSL, 8);
+    return;
+  }
+
+  // Immediate is of the form 0xFFMM0000.
+  if ((imm & 0xff00ffff) == 0xff000000) {
+    mvni(vd, ~bytes[2] & 0xff, MSL, 16);
+    return;
+  }
+  // Immediate is of the form 0xFFFFMM00.
+  if ((imm & 0xffff00ff) == 0xffff0000) {
+    mvni(vd, ~bytes[1] & 0xff, MSL, 8);
+    return;
+  }
+
+  // Top and bottom 16-bits are equal.
+  if (((imm >> 16) & 0xffff) == (imm & 0xffff)) {
+    Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff);
+    return;
+  }
+
+  // Default case.
+  {
+    UseScratchRegisterScope temps(this);
+    Register temp = temps.AcquireW();
+    Mov(temp, imm);
+    dup(vd, temp);
+  }
+}
+
+
+void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
+  // All bytes are either 0x00 or 0xff.
+  {
+    bool all0orff = true;
+    for (int i = 0; i < 8; ++i) {
+      int byteval = (imm >> (i * 8)) & 0xff;
+      if (byteval != 0 && byteval != 0xff) {
+        all0orff = false;
+        break;
+      }
+    }
+    if (all0orff == true) {
+      movi(vd, imm);
+      return;
+    }
+  }
+
+  // Top and bottom 32-bits are equal.
+  if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) {
+    Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff);
+    return;
+  }
+
+  // Default case.
+  {
+    UseScratchRegisterScope temps(this);
+    Register temp = temps.AcquireX();
+    Mov(temp, imm);
+    if (vd.Is1D()) {
+      mov(vd.D(), 0, temp);
+    } else {
+      dup(vd.V2D(), temp);
+    }
+  }
+}
+
+
+void MacroAssembler::Movi(const VRegister& vd,
+                          uint64_t imm,
+                          Shift shift,
+                          int shift_amount) {
+  MacroEmissionCheckScope guard(this);
+  if (shift_amount != 0 || shift != LSL) {
+    movi(vd, imm, shift, shift_amount);
+  } else if (vd.Is8B() || vd.Is16B()) {
+    // 8-bit immediate.
+    VIXL_ASSERT(IsUint8(imm));
+    movi(vd, imm);
+  } else if (vd.Is4H() || vd.Is8H()) {
+    // 16-bit immediate.
+    Movi16bitHelper(vd, imm);
+  } else if (vd.Is2S() || vd.Is4S()) {
+    // 32-bit immediate.
+    Movi32bitHelper(vd, imm);
+  } else {
+    // 64-bit immediate.
+    Movi64bitHelper(vd, imm);
+  }
+}
+
+
+void MacroAssembler::Movi(const VRegister& vd,
+                          uint64_t hi,
+                          uint64_t lo) {
+  VIXL_ASSERT(vd.Is128Bits());
+  UseScratchRegisterScope temps(this);
+
+  // When hi == lo, the following generates good code.
+  //
+  // In situations where the constants are complex and hi != lo, the following
+  // can turn into up to 10 instructions: 2*(mov + 3*movk + dup/insert).  To do
+  // any better, we could try to estimate whether splatting the high value and
+  // updating the low value would generate fewer instructions than vice versa
+  // (what we do now).
+  //
+  // (A PC-relative load from memory to the vector register (ADR + LD2) is going
+  // to have fairly high latency but is fairly compact; not clear what the best
+  // tradeoff is.)
+
+  Movi(vd.V2D(), lo);
+  if (hi != lo) {
+    Register temp = temps.AcquireX();
+    Mov(temp, hi);
+    Ins(vd.V2D(), 1, temp);
+  }
+}
+
+
+void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
+  // The worst case for size is mvn immediate with up to 4 instructions.
+  MacroEmissionCheckScope guard(this);
+
+  if (operand.IsImmediate()) {
+    // Call the macro assembler for generic immediates.
+    Mvn(rd, operand.immediate());
+  } else if (operand.IsExtendedRegister()) {
+    UseScratchRegisterScope temps(this);
+    temps.Exclude(operand.reg());
+
+    // Emit two instructions for the extend case. This differs from Mov, as
+    // the extend and invert can't be achieved in one instruction.
+    Register temp = temps.AcquireSameSizeAs(rd);
+
+    // VIXL can acquire temp registers. Assert that the caller is aware.
+    VIXL_ASSERT(!temp.Is(rd) && !temp.Is(operand.maybeReg()));
+
+    EmitExtendShift(temp, operand.reg(), operand.extend(),
+                    operand.shift_amount());
+    mvn(rd, Operand(temp));
+  } else {
+    // Otherwise, register and shifted register cases can be handled by the
+    // assembler directly, using orn.
+    mvn(rd, operand);
+  }
+}
+
+
+void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
+  MoveImmediateHelper(this, rd, imm);
+}
+
+
+void MacroAssembler::Ccmp(const Register& rn,
+                          const Operand& operand,
+                          StatusFlags nzcv,
+                          Condition cond) {
+  if (operand.IsImmediate() && (operand.immediate() < 0)) {
+    ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMN);
+  } else {
+    ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
+  }
+}
+
+
+void MacroAssembler::Ccmn(const Register& rn,
+                          const Operand& operand,
+                          StatusFlags nzcv,
+                          Condition cond) {
+  if (operand.IsImmediate() && (operand.immediate() < 0)) {
+    ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMP);
+  } else {
+    ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
+  }
+}
+
+
+void MacroAssembler::ConditionalCompareMacro(const Register& rn,
+                                             const Operand& operand,
+                                             StatusFlags nzcv,
+                                             Condition cond,
+                                             ConditionalCompareOp op) {
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  // The worst case for size is ccmp immediate:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction for ccmp
+  MacroEmissionCheckScope guard(this);
+
+  if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) ||
+      (operand.IsImmediate() && IsImmConditionalCompare(operand.immediate()))) {
+    // The immediate can be encoded in the instruction, or the operand is an
+    // unshifted register: call the assembler.
+    ConditionalCompare(rn, operand, nzcv, cond, op);
+  } else {
+    UseScratchRegisterScope temps(this);
+    // The operand isn't directly supported by the instruction: perform the
+    // operation on a temporary register.
+    Register temp = temps.AcquireSameSizeAs(rn);
+    VIXL_ASSERT(!temp.Is(rn) && !temp.Is(operand.maybeReg()));
+    Mov(temp, operand);
+    ConditionalCompare(rn, temp, nzcv, cond, op);
+  }
+}
+
+
+void MacroAssembler::Csel(const Register& rd,
+                          const Register& rn,
+                          const Operand& operand,
+                          Condition cond) {
+  VIXL_ASSERT(!rd.IsZero());
+  VIXL_ASSERT(!rn.IsZero());
+  VIXL_ASSERT((cond != al) && (cond != nv));
+  // The worst case for size is csel immediate:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction for csel
+  MacroEmissionCheckScope guard(this);
+
+  if (operand.IsImmediate()) {
+    // Immediate argument. Handle special cases of 0, 1 and -1 using zero
+    // register.
+    int64_t imm = operand.immediate();
+    Register zr = AppropriateZeroRegFor(rn);
+    if (imm == 0) {
+      csel(rd, rn, zr, cond);
+    } else if (imm == 1) {
+      csinc(rd, rn, zr, cond);
+    } else if (imm == -1) {
+      csinv(rd, rn, zr, cond);
+    } else {
+      UseScratchRegisterScope temps(this);
+      Register temp = temps.AcquireSameSizeAs(rn);
+      VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn));
+      VIXL_ASSERT(!temp.Is(operand.maybeReg()));
+      Mov(temp, operand.immediate());
+      csel(rd, rn, temp, cond);
+    }
+  } else if (operand.IsShiftedRegister() && (operand.shift_amount() == 0)) {
+    // Unshifted register argument.
+    csel(rd, rn, operand.reg(), cond);
+  } else {
+    // All other arguments.
+    UseScratchRegisterScope temps(this);
+    Register temp = temps.AcquireSameSizeAs(rn);
+    VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn));
+    VIXL_ASSERT(!temp.Is(operand.maybeReg()));
+    Mov(temp, operand);
+    csel(rd, rn, temp, cond);
+  }
+}
+
+
+void MacroAssembler::Add(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand,
+                         FlagsUpdate S) {
+  if (operand.IsImmediate() && (operand.immediate() < 0) &&
+      IsImmAddSub(-operand.immediate())) {
+    AddSubMacro(rd, rn, -operand.immediate(), S, SUB);
+  } else {
+    AddSubMacro(rd, rn, operand, S, ADD);
+  }
+}
+
+
+void MacroAssembler::Adds(const Register& rd,
+                          const Register& rn,
+                          const Operand& operand) {
+  Add(rd, rn, operand, SetFlags);
+}
+
+
+void MacroAssembler::Sub(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand,
+                         FlagsUpdate S) {
+  if (operand.IsImmediate() && (operand.immediate() < 0) &&
+      IsImmAddSub(-operand.immediate())) {
+    AddSubMacro(rd, rn, -operand.immediate(), S, ADD);
+  } else {
+    AddSubMacro(rd, rn, operand, S, SUB);
+  }
+}
+
+
+void MacroAssembler::Subs(const Register& rd,
+                          const Register& rn,
+                          const Operand& operand) {
+  Sub(rd, rn, operand, SetFlags);
+}
+
+
+void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
+  Adds(AppropriateZeroRegFor(rn), rn, operand);
+}
+
+
+void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
+  Subs(AppropriateZeroRegFor(rn), rn, operand);
+}
+
+
+void MacroAssembler::Fcmp(const FPRegister& fn, double value,
+                          FPTrapFlags trap) {
+  // The worst case for size is:
+  //  * 1 to materialise the constant, using literal pool if necessary
+  //  * 1 instruction for fcmp{e}
+  MacroEmissionCheckScope guard(this);
+  if (value != 0.0) {
+    UseScratchRegisterScope temps(this);
+    FPRegister tmp = temps.AcquireSameSizeAs(fn);
+    VIXL_ASSERT(!tmp.Is(fn));
+    Fmov(tmp, value);
+    FPCompareMacro(fn, tmp, trap);
+  } else {
+    FPCompareMacro(fn, value, trap);
+  }
+}
+
+
+void MacroAssembler::Fcmpe(const FPRegister& fn, double value) {
+  Fcmp(fn, value, EnableTrap);
+}
+
+
+void MacroAssembler::Fmov(VRegister vd, double imm) {
+  // Floating point immediates are loaded through the literal pool.
+  MacroEmissionCheckScope guard(this);
+
+  if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
+    Fmov(vd, static_cast<float>(imm));
+    return;
+  }
+
+  VIXL_ASSERT(vd.Is1D() || vd.Is2D());
+  if (IsImmFP64(imm)) {
+    fmov(vd, imm);
+  } else {
+    uint64_t rawbits = DoubleToRawbits(imm);
+    if (vd.IsScalar()) {
+      if (rawbits == 0) {
+        fmov(vd, xzr);
+      } else {
+        Assembler::fImmPool64(vd, imm);
+      }
+    } else {
+      // TODO: consider NEON support for load literal.
+      Movi(vd, rawbits);
+    }
+  }
+}
+
+
+void MacroAssembler::Fmov(VRegister vd, float imm) {
+  // Floating point immediates are loaded through the literal pool.
+  MacroEmissionCheckScope guard(this);
+
+  if (vd.Is1D() || vd.Is2D()) {
+    Fmov(vd, static_cast<double>(imm));
+    return;
+  }
+
+  VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S());
+  if (IsImmFP32(imm)) {
+    fmov(vd, imm);
+  } else {
+    uint32_t rawbits = FloatToRawbits(imm);
+    if (vd.IsScalar()) {
+      if (rawbits == 0) {
+        fmov(vd, wzr);
+      } else {
+        Assembler::fImmPool32(vd, imm);
+      }
+    } else {
+      // TODO: consider NEON support for load literal.
+      Movi(vd, rawbits);
+    }
+  }
+}
+
+
+
+void MacroAssembler::Neg(const Register& rd,
+                         const Operand& operand) {
+  if (operand.IsImmediate()) {
+    Mov(rd, -operand.immediate());
+  } else {
+    Sub(rd, AppropriateZeroRegFor(rd), operand);
+  }
+}
+
+
+void MacroAssembler::Negs(const Register& rd,
+                          const Operand& operand) {
+  Subs(rd, AppropriateZeroRegFor(rd), operand);
+}
+
+
+bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
+                                              int64_t imm) {
+  return OneInstrMoveImmediateHelper(this, dst, imm);
+}
+
+
+Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst,
+                                                  int64_t imm,
+                                                  PreShiftImmMode mode) {
+  int reg_size = dst.size();
+
+  // Encode the immediate in a single move instruction, if possible.
+  if (TryOneInstrMoveImmediate(dst, imm)) {
+    // The move was successful; nothing to do here.
+  } else {
+    // Pre-shift the immediate to the least-significant bits of the register.
+    int shift_low = CountTrailingZeros(imm, reg_size);
+    if (mode == kLimitShiftForSP) {
+      // When applied to the stack pointer, the subsequent arithmetic operation
+      // can use the extend form to shift left by a maximum of four bits. Right
+      // shifts are not allowed, so we filter them out later before the new
+      // immediate is tested.
+      shift_low = std::min(shift_low, 4);
+    }
+
+    int64_t imm_low = imm >> shift_low;
+
+    // Pre-shift the immediate to the most-significant bits of the register,
+    // inserting set bits in the least-significant bits.
+    int shift_high = CountLeadingZeros(imm, reg_size);
+    int64_t imm_high = (imm << shift_high) | ((INT64_C(1) << shift_high) - 1);
+
+    if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) {
+      // The new immediate has been moved into the destination's low bits:
+      // return a new leftward-shifting operand.
+      return Operand(dst, LSL, shift_low);
+    } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) {
+      // The new immediate has been moved into the destination's high bits:
+      // return a new rightward-shifting operand.
+      return Operand(dst, LSR, shift_high);
+    } else {
+      Mov(dst, imm);
+    }
+  }
+  return Operand(dst);
+}
+
+
+void MacroAssembler::ComputeAddress(const Register& dst,
+                                    const MemOperand& mem_op) {
+  // We cannot handle pre-indexing or post-indexing.
+  VIXL_ASSERT(mem_op.addrmode() == Offset);
+  Register base = mem_op.base();
+  if (mem_op.IsImmediateOffset()) {
+    Add(dst, base, mem_op.offset());
+  } else {
+    VIXL_ASSERT(mem_op.IsRegisterOffset());
+    Register reg_offset = mem_op.regoffset();
+    Shift shift = mem_op.shift();
+    Extend extend = mem_op.extend();
+    if (shift == NO_SHIFT) {
+      VIXL_ASSERT(extend != NO_EXTEND);
+      Add(dst, base, Operand(reg_offset, extend, mem_op.shift_amount()));
+    } else {
+      VIXL_ASSERT(extend == NO_EXTEND);
+      Add(dst, base, Operand(reg_offset, shift, mem_op.shift_amount()));
+    }
+  }
+}
+
+
+void MacroAssembler::AddSubMacro(const Register& rd,
+                                 const Register& rn,
+                                 const Operand& operand,
+                                 FlagsUpdate S,
+                                 AddSubOp op) {
+  // Worst case is add/sub immediate:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction for add/sub
+  MacroEmissionCheckScope guard(this);
+
+  if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
+      (S == LeaveFlags)) {
+    // The instruction would be a nop. Avoid generating useless code.
+    return;
+  }
+
+  if ((operand.IsImmediate() && !IsImmAddSub(operand.immediate())) ||
+      (rn.IsZero() && !operand.IsShiftedRegister()) ||
+      (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
+    UseScratchRegisterScope temps(this);
+    Register temp = temps.AcquireSameSizeAs(rn);
+    if (operand.IsImmediate()) {
+      PreShiftImmMode mode = kAnyShift;
+
+      // If the destination or source register is the stack pointer, we can
+      // only pre-shift the immediate right by values supported in the add/sub
+      // extend encoding.
+      if (rd.IsSP()) {
+        // If the destination is SP and flags will be set, we can't pre-shift
+        // the immediate at all. 
+        mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP;
+      } else if (rn.IsSP()) {
+        mode = kLimitShiftForSP;
+      } 
+
+      Operand imm_operand =
+          MoveImmediateForShiftedOp(temp, operand.immediate(), mode);
+      AddSub(rd, rn, imm_operand, S, op); 
+    } else {
+      Mov(temp, operand);
+      AddSub(rd, rn, temp, S, op);
+    }
+  } else {
+    AddSub(rd, rn, operand, S, op);
+  }
+}
+
+
+void MacroAssembler::Adc(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand) {
+  AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC);
+}
+
+
+void MacroAssembler::Adcs(const Register& rd,
+                          const Register& rn,
+                          const Operand& operand) {
+  AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC);
+}
+
+
+void MacroAssembler::Sbc(const Register& rd,
+                         const Register& rn,
+                         const Operand& operand) {
+  AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC);
+}
+
+
+void MacroAssembler::Sbcs(const Register& rd,
+                          const Register& rn,
+                          const Operand& operand) {
+  AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC);
+}
+
+
+void MacroAssembler::Ngc(const Register& rd,
+                         const Operand& operand) {
+  Register zr = AppropriateZeroRegFor(rd);
+  Sbc(rd, zr, operand);
+}
+
+
+void MacroAssembler::Ngcs(const Register& rd,
+                         const Operand& operand) {
+  Register zr = AppropriateZeroRegFor(rd);
+  Sbcs(rd, zr, operand);
+}
+
+
+void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
+                                          const Register& rn,
+                                          const Operand& operand,
+                                          FlagsUpdate S,
+                                          AddSubWithCarryOp op) {
+  VIXL_ASSERT(rd.size() == rn.size());
+  // Worst case is addc/subc immediate:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction for add/sub
+  MacroEmissionCheckScope guard(this);
+  UseScratchRegisterScope temps(this);
+
+  if (operand.IsImmediate() ||
+      (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
+    // Add/sub with carry (immediate or ROR shifted register.)
+    Register temp = temps.AcquireSameSizeAs(rn);
+    VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg()));
+    Mov(temp, operand);
+    AddSubWithCarry(rd, rn, Operand(temp), S, op);
+  } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
+    // Add/sub with carry (shifted register).
+    VIXL_ASSERT(operand.reg().size() == rd.size());
+    VIXL_ASSERT(operand.shift() != ROR);
+    VIXL_ASSERT(IsUintN(rd.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
+                    operand.shift_amount()));
+    temps.Exclude(operand.reg());
+    Register temp = temps.AcquireSameSizeAs(rn);
+    VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg()));
+    EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
+    AddSubWithCarry(rd, rn, Operand(temp), S, op);
+  } else if (operand.IsExtendedRegister()) {
+    // Add/sub with carry (extended register).
+    VIXL_ASSERT(operand.reg().size() <= rd.size());
+    // Add/sub extended supports a shift <= 4. We want to support exactly the
+    // same modes.
+    VIXL_ASSERT(operand.shift_amount() <= 4);
+    VIXL_ASSERT(operand.reg().Is64Bits() ||
+           ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
+    temps.Exclude(operand.reg());
+    Register temp = temps.AcquireSameSizeAs(rn);
+    VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg()));
+    EmitExtendShift(temp, operand.reg(), operand.extend(),
+                    operand.shift_amount());
+    AddSubWithCarry(rd, rn, Operand(temp), S, op);
+  } else {
+    // The addressing mode is directly supported by the instruction.
+    AddSubWithCarry(rd, rn, operand, S, op);
+  }
+}
+
+
+#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                         \
+void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) {  \
+  LoadStoreMacro(REG, addr, OP);                                      \
+}
+LS_MACRO_LIST(DEFINE_FUNCTION)
+#undef DEFINE_FUNCTION
+
+
+void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
+                                    const MemOperand& addr,
+                                    LoadStoreOp op) {
+  // Worst case is ldr/str pre/post index:
+  //  * 1 instruction for ldr/str
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to update the base
+  MacroEmissionCheckScope guard(this);
+
+  int64_t offset = addr.offset();
+  unsigned access_size = CalcLSDataSize(op);
+
+  // Check if an immediate offset fits in the immediate field of the
+  // appropriate instruction. If not, emit two instructions to perform
+  // the operation.
+  if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) &&
+      !IsImmLSUnscaled(offset)) {
+    // Immediate offset that can't be encoded using unsigned or unscaled
+    // addressing modes.
+    UseScratchRegisterScope temps(this);
+    Register temp = temps.AcquireSameSizeAs(addr.base());
+    VIXL_ASSERT(!temp.Is(rt));
+    VIXL_ASSERT(!temp.Is(addr.base()) && !temp.Is(addr.regoffset()));
+    Mov(temp, addr.offset());
+    LoadStore(rt, MemOperand(addr.base(), temp), op);
+  } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
+    // Post-index beyond unscaled addressing range.
+    LoadStore(rt, MemOperand(addr.base()), op);
+    Add(addr.base(), addr.base(), Operand(offset));
+  } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
+    // Pre-index beyond unscaled addressing range.
+    Add(addr.base(), addr.base(), Operand(offset));
+    LoadStore(rt, MemOperand(addr.base()), op);
+  } else {
+    // Encodable in one load/store instruction.
+    LoadStore(rt, addr, op);
+  }
+}
+
+
+#define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP)  \
+void MacroAssembler::FN(const REGTYPE REG,           \
+                        const REGTYPE REG2,          \
+                        const MemOperand& addr) {    \
+  LoadStorePairMacro(REG, REG2, addr, OP);           \
+}
+LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
+#undef DEFINE_FUNCTION
+
+void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
+                                        const CPURegister& rt2,
+                                        const MemOperand& addr,
+                                        LoadStorePairOp op) {
+  // TODO(all): Should we support register offset for load-store-pair?
+  VIXL_ASSERT(!addr.IsRegisterOffset());
+  // Worst case is ldp/stp immediate:
+  //  * 1 instruction for ldp/stp
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to update the base
+  MacroEmissionCheckScope guard(this);
+
+  int64_t offset = addr.offset();
+  unsigned access_size = CalcLSPairDataSize(op);
+
+  // Check if the offset fits in the immediate field of the appropriate
+  // instruction. If not, emit two instructions to perform the operation.
+  if (IsImmLSPair(offset, access_size)) {
+    // Encodable in one load/store pair instruction.
+    LoadStorePair(rt, rt2, addr, op);
+  } else {
+    Register base = addr.base();
+    if (addr.IsImmediateOffset()) {
+      UseScratchRegisterScope temps(this);
+      Register temp = temps.AcquireSameSizeAs(base);
+      Add(temp, base, offset);
+      LoadStorePair(rt, rt2, MemOperand(temp), op);
+    } else if (addr.IsPostIndex()) {
+      LoadStorePair(rt, rt2, MemOperand(base), op);
+      Add(base, base, offset);
+    } else {
+      VIXL_ASSERT(addr.IsPreIndex());
+      Add(base, base, offset);
+      LoadStorePair(rt, rt2, MemOperand(base), op);
+    }
+  }
+}
+
+
+void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) {
+  MacroEmissionCheckScope guard(this);
+
+  // There are no pre- or post-index modes for prfm.
+  VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset());
+
+  // The access size is implicitly 8 bytes for all prefetch operations.
+  unsigned size = kXRegSizeInBytesLog2;
+
+  // Check if an immediate offset fits in the immediate field of the
+  // appropriate instruction. If not, emit two instructions to perform
+  // the operation.
+  if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.offset(), size) &&
+      !IsImmLSUnscaled(addr.offset())) {
+    // Immediate offset that can't be encoded using unsigned or unscaled
+    // addressing modes.
+    UseScratchRegisterScope temps(this);
+    Register temp = temps.AcquireSameSizeAs(addr.base());
+    Mov(temp, addr.offset());
+    Prefetch(op, MemOperand(addr.base(), temp));
+  } else {
+    // Simple register-offsets are encodable in one instruction.
+    Prefetch(op, addr);
+  }
+}
+
+
+void MacroAssembler::PushStackPointer() {
+  PrepareForPush(1, 8);
+
+  // Pushing a stack pointer leads to implementation-defined
+  // behavior, which may be surprising. In particular,
+  //   str x28, [x28, #-8]!
+  // pre-decrements the stack pointer, storing the decremented value.
+  // Additionally, sp is read as xzr in this context, so it cannot be pushed.
+  // So we must use a scratch register.
+  UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireX();
+
+  Mov(scratch, GetStackPointer64());
+  str(scratch, MemOperand(GetStackPointer64(), -8, PreIndex));
+}
+
+
+void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1,
+                          const CPURegister& src2, const CPURegister& src3) {
+  VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
+  VIXL_ASSERT(src0.IsValid());
+
+  int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
+  int size = src0.SizeInBytes();
+
+  if (src0.Is(GetStackPointer64())) {
+    VIXL_ASSERT(count == 1);
+    VIXL_ASSERT(size == 8);
+    PushStackPointer();
+    return;
+  }
+
+  PrepareForPush(count, size);
+  PushHelper(count, size, src0, src1, src2, src3);
+}
+
+
+void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
+                         const CPURegister& dst2, const CPURegister& dst3) {
+  // It is not valid to pop into the same register more than once in one
+  // instruction, not even into the zero register.
+  VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
+  VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
+  VIXL_ASSERT(dst0.IsValid());
+
+  int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
+  int size = dst0.SizeInBytes();
+
+  PrepareForPop(count, size);
+  PopHelper(count, size, dst0, dst1, dst2, dst3);
+}
+
+
+void MacroAssembler::PushCPURegList(CPURegList registers) {
+  VIXL_ASSERT(!registers.Overlaps(*TmpList()));
+  VIXL_ASSERT(!registers.Overlaps(*FPTmpList()));
+
+  int reg_size = registers.RegisterSizeInBytes();
+  PrepareForPush(registers.Count(), reg_size);
+
+  // Bump the stack pointer and store two registers at the bottom.
+  int size = registers.TotalSizeInBytes();
+  const CPURegister& bottom_0 = registers.PopLowestIndex();
+  const CPURegister& bottom_1 = registers.PopLowestIndex();
+  if (bottom_0.IsValid() && bottom_1.IsValid()) {
+    Stp(bottom_0, bottom_1, MemOperand(GetStackPointer64(), -size, PreIndex));
+  } else if (bottom_0.IsValid()) {
+    Str(bottom_0, MemOperand(GetStackPointer64(), -size, PreIndex));
+  }
+
+  int offset = 2 * reg_size;
+  while (!registers.IsEmpty()) {
+    const CPURegister& src0 = registers.PopLowestIndex();
+    const CPURegister& src1 = registers.PopLowestIndex();
+    if (src1.IsValid()) {
+      Stp(src0, src1, MemOperand(GetStackPointer64(), offset));
+    } else {
+      Str(src0, MemOperand(GetStackPointer64(), offset));
+    }
+    offset += 2 * reg_size;
+  }
+}
+
+
+void MacroAssembler::PopCPURegList(CPURegList registers) {
+  VIXL_ASSERT(!registers.Overlaps(*TmpList()));
+  VIXL_ASSERT(!registers.Overlaps(*FPTmpList()));
+
+  int reg_size = registers.RegisterSizeInBytes();
+  PrepareForPop(registers.Count(), reg_size);
+
+
+  int size = registers.TotalSizeInBytes();
+  const CPURegister& bottom_0 = registers.PopLowestIndex();
+  const CPURegister& bottom_1 = registers.PopLowestIndex();
+
+  int offset = 2 * reg_size;
+  while (!registers.IsEmpty()) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    const CPURegister& dst1 = registers.PopLowestIndex();
+    if (dst1.IsValid()) {
+      Ldp(dst0, dst1, MemOperand(GetStackPointer64(), offset));
+    } else {
+      Ldr(dst0, MemOperand(GetStackPointer64(), offset));
+    }
+    offset += 2 * reg_size;
+  }
+
+  // Load the two registers at the bottom and drop the stack pointer.
+  if (bottom_0.IsValid() && bottom_1.IsValid()) {
+    Ldp(bottom_0, bottom_1, MemOperand(GetStackPointer64(), size, PostIndex));
+  } else if (bottom_0.IsValid()) {
+    Ldr(bottom_0, MemOperand(GetStackPointer64(), size, PostIndex));
+  }
+}
+
+
+void MacroAssembler::PushMultipleTimes(int count, Register src) {
+  int size = src.SizeInBytes();
+
+  PrepareForPush(count, size);
+  // Push up to four registers at a time if possible because if the current
+  // stack pointer is sp and the register size is 32, registers must be pushed
+  // in blocks of four in order to maintain the 16-byte alignment for sp.
+  while (count >= 4) {
+    PushHelper(4, size, src, src, src, src);
+    count -= 4;
+  }
+  if (count >= 2) {
+    PushHelper(2, size, src, src, NoReg, NoReg);
+    count -= 2;
+  }
+  if (count == 1) {
+    PushHelper(1, size, src, NoReg, NoReg, NoReg);
+    count -= 1;
+  }
+  VIXL_ASSERT(count == 0);
+}
+
+
+void MacroAssembler::PushHelper(int count, int size,
+                                const CPURegister& src0,
+                                const CPURegister& src1,
+                                const CPURegister& src2,
+                                const CPURegister& src3) {
+  // Ensure that we don't unintentionally modify scratch or debug registers.
+  // Worst case for size is 2 stp.
+  InstructionAccurateScope scope(this, 2,
+                                 InstructionAccurateScope::kMaximumSize);
+
+  VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
+  VIXL_ASSERT(size == src0.SizeInBytes());
+
+  // Pushing the stack pointer has unexpected behavior. See PushStackPointer().
+  VIXL_ASSERT(!src0.Is(GetStackPointer64()) && !src0.Is(sp));
+  VIXL_ASSERT(!src1.Is(GetStackPointer64()) && !src1.Is(sp));
+  VIXL_ASSERT(!src2.Is(GetStackPointer64()) && !src2.Is(sp));
+  VIXL_ASSERT(!src3.Is(GetStackPointer64()) && !src3.Is(sp));
+
+  // The JS engine should never push 4 bytes.
+  VIXL_ASSERT(size >= 8);
+
+  // When pushing multiple registers, the store order is chosen such that
+  // Push(a, b) is equivalent to Push(a) followed by Push(b).
+  switch (count) {
+    case 1:
+      VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
+      str(src0, MemOperand(GetStackPointer64(), -1 * size, PreIndex));
+      break;
+    case 2:
+      VIXL_ASSERT(src2.IsNone() && src3.IsNone());
+      stp(src1, src0, MemOperand(GetStackPointer64(), -2 * size, PreIndex));
+      break;
+    case 3:
+      VIXL_ASSERT(src3.IsNone());
+      stp(src2, src1, MemOperand(GetStackPointer64(), -3 * size, PreIndex));
+      str(src0, MemOperand(GetStackPointer64(), 2 * size));
+      break;
+    case 4:
+      // Skip over 4 * size, then fill in the gap. This allows four W registers
+      // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
+      // all times.
+      stp(src3, src2, MemOperand(GetStackPointer64(), -4 * size, PreIndex));
+      stp(src1, src0, MemOperand(GetStackPointer64(), 2 * size));
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+}
+
+
+void MacroAssembler::PopHelper(int count, int size,
+                               const CPURegister& dst0,
+                               const CPURegister& dst1,
+                               const CPURegister& dst2,
+                               const CPURegister& dst3) {
+  // Ensure that we don't unintentionally modify scratch or debug registers.
+  // Worst case for size is 2 ldp.
+  InstructionAccurateScope scope(this, 2,
+                                 InstructionAccurateScope::kMaximumSize);
+
+  VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
+  VIXL_ASSERT(size == dst0.SizeInBytes());
+
+  // When popping multiple registers, the load order is chosen such that
+  // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
+  switch (count) {
+    case 1:
+      VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
+      ldr(dst0, MemOperand(GetStackPointer64(), 1 * size, PostIndex));
+      break;
+    case 2:
+      VIXL_ASSERT(dst2.IsNone() && dst3.IsNone());
+      ldp(dst0, dst1, MemOperand(GetStackPointer64(), 2 * size, PostIndex));
+      break;
+    case 3:
+      VIXL_ASSERT(dst3.IsNone());
+      ldr(dst2, MemOperand(GetStackPointer64(), 2 * size));
+      ldp(dst0, dst1, MemOperand(GetStackPointer64(), 3 * size, PostIndex));
+      break;
+    case 4:
+      // Load the higher addresses first, then load the lower addresses and skip
+      // the whole block in the second instruction. This allows four W registers
+      // to be popped using sp, whilst maintaining 16-byte alignment for sp at
+      // all times.
+      ldp(dst2, dst3, MemOperand(GetStackPointer64(), 2 * size));
+      ldp(dst0, dst1, MemOperand(GetStackPointer64(), 4 * size, PostIndex));
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+}
+
+
+void MacroAssembler::PrepareForPush(int count, int size) {
+  if (sp.Is(GetStackPointer64())) {
+    // If the current stack pointer is sp, then it must be aligned to 16 bytes
+    // on entry and the total size of the specified registers must also be a
+    // multiple of 16 bytes.
+    VIXL_ASSERT((count * size) % 16 == 0);
+  } else {
+    // Even if the current stack pointer is not the system stack pointer (sp),
+    // the system stack pointer will still be modified in order to comply with
+    // ABI rules about accessing memory below the system stack pointer.
+    BumpSystemStackPointer(count * size);
+  }
+}
+
+
+void MacroAssembler::PrepareForPop(int count, int size) {
+  USE(count, size);
+  if (sp.Is(GetStackPointer64())) {
+    // If the current stack pointer is sp, then it must be aligned to 16 bytes
+    // on entry and the total size of the specified registers must also be a
+    // multiple of 16 bytes.
+    VIXL_ASSERT((count * size) % 16 == 0);
+  }
+}
+
+void MacroAssembler::Poke(const Register& src, const Operand& offset) {
+  if (offset.IsImmediate()) {
+    VIXL_ASSERT(offset.immediate() >= 0);
+  }
+
+  Str(src, MemOperand(GetStackPointer64(), offset));
+}
+
+
+void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
+  if (offset.IsImmediate()) {
+    VIXL_ASSERT(offset.immediate() >= 0);
+  }
+
+  Ldr(dst, MemOperand(GetStackPointer64(), offset));
+}
+
+
+void MacroAssembler::Claim(const Operand& size) {
+
+  if (size.IsZero()) {
+    return;
+  }
+
+  if (size.IsImmediate()) {
+    VIXL_ASSERT(size.immediate() > 0);
+    if (sp.Is(GetStackPointer64())) {
+      VIXL_ASSERT((size.immediate() % 16) == 0);
+    }
+  }
+
+  Sub(GetStackPointer64(), GetStackPointer64(), size);
+
+  // Make sure the real stack pointer reflects the claimed stack space.
+  // We can't use stack memory below the stack pointer, it could be clobbered by
+  // interupts and signal handlers.
+  if (!sp.Is(GetStackPointer64())) {
+    Mov(sp, GetStackPointer64());
+  }
+}
+
+
+void MacroAssembler::Drop(const Operand& size) {
+
+  if (size.IsZero()) {
+    return;
+  }
+
+  if (size.IsImmediate()) {
+    VIXL_ASSERT(size.immediate() > 0);
+    if (sp.Is(GetStackPointer64())) {
+      VIXL_ASSERT((size.immediate() % 16) == 0);
+    }
+  }
+
+  Add(GetStackPointer64(), GetStackPointer64(), size);
+}
+
+
+void MacroAssembler::PushCalleeSavedRegisters() {
+  // Ensure that the macro-assembler doesn't use any scratch registers.
+  // 10 stp will be emitted.
+  // TODO(all): Should we use GetCalleeSaved and SavedFP.
+  InstructionAccurateScope scope(this, 10);
+
+  // This method must not be called unless the current stack pointer is sp.
+  VIXL_ASSERT(sp.Is(GetStackPointer64()));
+
+  MemOperand tos(sp, -2 * static_cast<int>(kXRegSizeInBytes), PreIndex);
+
+  stp(x29, x30, tos);
+  stp(x27, x28, tos);
+  stp(x25, x26, tos);
+  stp(x23, x24, tos);
+  stp(x21, x22, tos);
+  stp(x19, x20, tos);
+
+  stp(d14, d15, tos);
+  stp(d12, d13, tos);
+  stp(d10, d11, tos);
+  stp(d8, d9, tos);
+}
+
+
+void MacroAssembler::PopCalleeSavedRegisters() {
+  // Ensure that the macro-assembler doesn't use any scratch registers.
+  // 10 ldp will be emitted.
+  // TODO(all): Should we use GetCalleeSaved and SavedFP.
+  InstructionAccurateScope scope(this, 10);
+
+  // This method must not be called unless the current stack pointer is sp.
+  VIXL_ASSERT(sp.Is(GetStackPointer64()));
+
+  MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
+
+  ldp(d8, d9, tos);
+  ldp(d10, d11, tos);
+  ldp(d12, d13, tos);
+  ldp(d14, d15, tos);
+
+  ldp(x19, x20, tos);
+  ldp(x21, x22, tos);
+  ldp(x23, x24, tos);
+  ldp(x25, x26, tos);
+  ldp(x27, x28, tos);
+  ldp(x29, x30, tos);
+}
+
+void MacroAssembler::LoadCPURegList(CPURegList registers,
+                                    const MemOperand& src) {
+  LoadStoreCPURegListHelper(kLoad, registers, src);
+}
+
+void MacroAssembler::StoreCPURegList(CPURegList registers,
+                                     const MemOperand& dst) {
+  LoadStoreCPURegListHelper(kStore, registers, dst);
+}
+
+
+void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
+                                               CPURegList registers,
+                                               const MemOperand& mem) {
+  // We do not handle pre-indexing or post-indexing.
+  VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
+  VIXL_ASSERT(!registers.Overlaps(tmp_list_));
+  VIXL_ASSERT(!registers.Overlaps(fptmp_list_));
+  VIXL_ASSERT(!registers.IncludesAliasOf(sp));
+
+  UseScratchRegisterScope temps(this);
+
+  MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers,
+                                                        mem,
+                                                        &temps);
+
+  while (registers.Count() >= 2) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    const CPURegister& dst1 = registers.PopLowestIndex();
+    if (op == kStore) {
+      Stp(dst0, dst1, loc);
+    } else {
+      VIXL_ASSERT(op == kLoad);
+      Ldp(dst0, dst1, loc);
+    }
+    loc.AddOffset(2 * registers.RegisterSizeInBytes());
+  }
+  if (!registers.IsEmpty()) {
+    if (op == kStore) {
+      Str(registers.PopLowestIndex(), loc);
+    } else {
+      VIXL_ASSERT(op == kLoad);
+      Ldr(registers.PopLowestIndex(), loc);
+    }
+  }
+}
+
+MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
+    const CPURegList& registers,
+    const MemOperand& mem,
+    UseScratchRegisterScope* scratch_scope) {
+  // If necessary, pre-compute the base address for the accesses.
+  if (mem.IsRegisterOffset()) {
+    Register reg_base = scratch_scope->AcquireX();
+    ComputeAddress(reg_base, mem);
+    return MemOperand(reg_base);
+
+  } else if (mem.IsImmediateOffset()) {
+    int reg_size = registers.RegisterSizeInBytes();
+    int total_size = registers.TotalSizeInBytes();
+    int64_t min_offset = mem.offset();
+    int64_t max_offset = mem.offset() + std::max(0, total_size - 2 * reg_size);
+    if ((registers.Count() >= 2) &&
+        (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
+         !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
+      Register reg_base = scratch_scope->AcquireX();
+      ComputeAddress(reg_base, mem);
+      return MemOperand(reg_base);
+    }
+  }
+
+  return mem;
+}
+
+void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
+  VIXL_ASSERT(!sp.Is(GetStackPointer64()));
+  // TODO: Several callers rely on this not using scratch registers, so we use
+  // the assembler directly here. However, this means that large immediate
+  // values of 'space' cannot be handled.
+  InstructionAccurateScope scope(this, 1);
+  sub(sp, GetStackPointer64(), space);
+}
+
+
+void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
+
+#ifdef JS_SIMULATOR_ARM64
+  // The arguments to the trace pseudo instruction need to be contiguous in
+  // memory, so make sure we don't try to emit a literal pool.
+  InstructionAccurateScope scope(this, kTraceLength / kInstructionSize);
+
+  Label start;
+  bind(&start);
+
+  // Refer to simulator-a64.h for a description of the marker and its
+  // arguments.
+  hlt(kTraceOpcode);
+
+  // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
+  dc32(parameters);
+
+  // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
+  dc32(command);
+#else
+  // Emit nothing on real hardware.
+  USE(parameters, command);
+#endif
+}
+
+
+void MacroAssembler::Log(TraceParameters parameters) {
+
+#ifdef JS_SIMULATOR_ARM64
+  // The arguments to the log pseudo instruction need to be contiguous in
+  // memory, so make sure we don't try to emit a literal pool.
+  InstructionAccurateScope scope(this, kLogLength / kInstructionSize);
+
+  Label start;
+  bind(&start);
+
+  // Refer to simulator-a64.h for a description of the marker and its
+  // arguments.
+  hlt(kLogOpcode);
+
+  // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
+  dc32(parameters);
+#else
+  // Emit nothing on real hardware.
+  USE(parameters);
+#endif
+}
+
+
+void MacroAssembler::EnableInstrumentation() {
+  VIXL_ASSERT(!isprint(InstrumentStateEnable));
+  InstructionAccurateScope scope(this, 1);
+  movn(xzr, InstrumentStateEnable);
+}
+
+
+void MacroAssembler::DisableInstrumentation() {
+  VIXL_ASSERT(!isprint(InstrumentStateDisable));
+  InstructionAccurateScope scope(this, 1);
+  movn(xzr, InstrumentStateDisable);
+}
+
+
+void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
+  VIXL_ASSERT(strlen(marker_name) == 2);
+
+  // We allow only printable characters in the marker names. Unprintable
+  // characters are reserved for controlling features of the instrumentation.
+  VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));
+
+  InstructionAccurateScope scope(this, 1);
+  movn(xzr, (marker_name[1] << 8) | marker_name[0]);
+}
+
+
+void UseScratchRegisterScope::Open(MacroAssembler* masm) {
+  VIXL_ASSERT(!initialised_);
+  available_ = masm->TmpList();
+  availablefp_ = masm->FPTmpList();
+  old_available_ = available_->list();
+  old_availablefp_ = availablefp_->list();
+  VIXL_ASSERT(available_->type() == CPURegister::kRegister);
+  VIXL_ASSERT(availablefp_->type() == CPURegister::kVRegister);
+#ifdef DEBUG
+  initialised_ = true;
+#endif
+}
+
+
+void UseScratchRegisterScope::Close() {
+  if (available_) {
+    available_->set_list(old_available_);
+    available_ = NULL;
+  }
+  if (availablefp_) {
+    availablefp_->set_list(old_availablefp_);
+    availablefp_ = NULL;
+  }
+#ifdef DEBUG
+  initialised_ = false;
+#endif
+}
+
+
+UseScratchRegisterScope::UseScratchRegisterScope(MacroAssembler* masm) {
+#ifdef DEBUG
+  initialised_ = false;
+#endif
+  Open(masm);
+}
+
+// This allows deferred (and optional) initialisation of the scope.
+UseScratchRegisterScope::UseScratchRegisterScope()
+    : available_(NULL), availablefp_(NULL),
+      old_available_(0), old_availablefp_(0) {
+#ifdef DEBUG
+  initialised_ = false;
+#endif
+}
+
+UseScratchRegisterScope::~UseScratchRegisterScope() {
+  Close();
+}
+
+
+bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
+  return available_->IncludesAliasOf(reg) || availablefp_->IncludesAliasOf(reg);
+}
+
+
+Register UseScratchRegisterScope::AcquireSameSizeAs(const Register& reg) {
+  int code = AcquireNextAvailable(available_).code();
+  return Register(code, reg.size());
+}
+
+
+FPRegister UseScratchRegisterScope::AcquireSameSizeAs(const FPRegister& reg) {
+  int code = AcquireNextAvailable(availablefp_).code();
+  return FPRegister(code, reg.size());
+}
+
+
+void UseScratchRegisterScope::Release(const CPURegister& reg) {
+  VIXL_ASSERT(initialised_);
+  if (reg.IsRegister()) {
+    ReleaseByCode(available_, reg.code());
+  } else if (reg.IsFPRegister()) {
+    ReleaseByCode(availablefp_, reg.code());
+  } else {
+    VIXL_ASSERT(reg.IsNone());
+  }
+}
+
+
+void UseScratchRegisterScope::Include(const CPURegList& list) {
+  VIXL_ASSERT(initialised_);
+  if (list.type() == CPURegister::kRegister) {
+    // Make sure that neither sp nor xzr are included the list.
+    IncludeByRegList(available_, list.list() & ~(xzr.Bit() | sp.Bit()));
+  } else {
+    VIXL_ASSERT(list.type() == CPURegister::kVRegister);
+    IncludeByRegList(availablefp_, list.list());
+  }
+}
+
+
+void UseScratchRegisterScope::Include(const Register& reg1,
+                                      const Register& reg2,
+                                      const Register& reg3,
+                                      const Register& reg4) {
+  VIXL_ASSERT(initialised_);
+  RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
+  // Make sure that neither sp nor xzr are included the list.
+  include &= ~(xzr.Bit() | sp.Bit());
+
+  IncludeByRegList(available_, include);
+}
+
+
+void UseScratchRegisterScope::Include(const FPRegister& reg1,
+                                      const FPRegister& reg2,
+                                      const FPRegister& reg3,
+                                      const FPRegister& reg4) {
+  RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
+  IncludeByRegList(availablefp_, include);
+}
+
+
+void UseScratchRegisterScope::Exclude(const CPURegList& list) {
+  if (list.type() == CPURegister::kRegister) {
+    ExcludeByRegList(available_, list.list());
+  } else {
+    VIXL_ASSERT(list.type() == CPURegister::kVRegister);
+    ExcludeByRegList(availablefp_, list.list());
+  }
+}
+
+
+void UseScratchRegisterScope::Exclude(const Register& reg1,
+                                      const Register& reg2,
+                                      const Register& reg3,
+                                      const Register& reg4) {
+  RegList exclude = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
+  ExcludeByRegList(available_, exclude);
+}
+
+
+void UseScratchRegisterScope::Exclude(const FPRegister& reg1,
+                                      const FPRegister& reg2,
+                                      const FPRegister& reg3,
+                                      const FPRegister& reg4) {
+  RegList excludefp = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
+  ExcludeByRegList(availablefp_, excludefp);
+}
+
+
+void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
+                                      const CPURegister& reg2,
+                                      const CPURegister& reg3,
+                                      const CPURegister& reg4) {
+  RegList exclude = 0;
+  RegList excludefp = 0;
+
+  const CPURegister regs[] = {reg1, reg2, reg3, reg4};
+
+  for (unsigned i = 0; i < (sizeof(regs) / sizeof(regs[0])); i++) {
+    if (regs[i].IsRegister()) {
+      exclude |= regs[i].Bit();
+    } else if (regs[i].IsFPRegister()) {
+      excludefp |= regs[i].Bit();
+    } else {
+      VIXL_ASSERT(regs[i].IsNone());
+    }
+  }
+
+  ExcludeByRegList(available_, exclude);
+  ExcludeByRegList(availablefp_, excludefp);
+}
+
+
+void UseScratchRegisterScope::ExcludeAll() {
+  ExcludeByRegList(available_, available_->list());
+  ExcludeByRegList(availablefp_, availablefp_->list());
+}
+
+
+CPURegister UseScratchRegisterScope::AcquireNextAvailable(
+    CPURegList* available) {
+  VIXL_CHECK(!available->IsEmpty());
+  CPURegister result = available->PopLowestIndex();
+  VIXL_ASSERT(!AreAliased(result, xzr, sp));
+  return result;
+}
+
+
+void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) {
+  ReleaseByRegList(available, static_cast<RegList>(1) << code);
+}
+
+
+void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available,
+                                               RegList regs) {
+  available->set_list(available->list() | regs);
+}
+
+
+void UseScratchRegisterScope::IncludeByRegList(CPURegList* available,
+                                               RegList regs) {
+  available->set_list(available->list() | regs);
+}
+
+
+void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
+                                               RegList exclude) {
+  available->set_list(available->list() & ~exclude);
+}
+
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/MacroAssembler-vixl.h b/js/src/jit/arm64/vixl/MacroAssembler-vixl.h
new file mode 100644
index 0000000000..3c403a815f
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MacroAssembler-vixl.h
@@ -0,0 +1,2622 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_MACRO_ASSEMBLER_A64_H_
+#define VIXL_A64_MACRO_ASSEMBLER_A64_H_
+
+#include <algorithm>
+#include <limits>
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/arm64/vixl/Debugger-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instrument-vixl.h"
+#include "jit/arm64/vixl/Simulator-Constants-vixl.h"
+
+#define LS_MACRO_LIST(V)                                      \
+  V(Ldrb, Register&, rt, LDRB_w)                              \
+  V(Strb, Register&, rt, STRB_w)                              \
+  V(Ldrsb, Register&, rt, rt.Is64Bits() ? LDRSB_x : LDRSB_w)  \
+  V(Ldrh, Register&, rt, LDRH_w)                              \
+  V(Strh, Register&, rt, STRH_w)                              \
+  V(Ldrsh, Register&, rt, rt.Is64Bits() ? LDRSH_x : LDRSH_w)  \
+  V(Ldr, CPURegister&, rt, LoadOpFor(rt))                     \
+  V(Str, CPURegister&, rt, StoreOpFor(rt))                    \
+  V(Ldrsw, Register&, rt, LDRSW_x)
+
+
+#define LSPAIR_MACRO_LIST(V)                              \
+  V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2))   \
+  V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2))  \
+  V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x)
+
+namespace vixl {
+
+// Forward declaration
+class MacroAssembler;
+class UseScratchRegisterScope;
+
+// This scope has the following purposes:
+//  * Acquire/Release the underlying assembler's code buffer.
+//     * This is mandatory before emitting.
+//  * Emit the literal or veneer pools if necessary before emitting the
+//    macro-instruction.
+//  * Ensure there is enough space to emit the macro-instruction.
+class EmissionCheckScope {
+ public:
+  EmissionCheckScope(MacroAssembler* masm, size_t size)
+    : masm_(masm)
+  { }
+
+ protected:
+  MacroAssembler* masm_;
+#ifdef DEBUG
+  Label start_;
+  size_t size_;
+#endif
+};
+
+
+// Helper for common Emission checks.
+// The macro-instruction maps to a single instruction.
+class SingleEmissionCheckScope : public EmissionCheckScope {
+ public:
+  explicit SingleEmissionCheckScope(MacroAssembler* masm)
+      : EmissionCheckScope(masm, kInstructionSize) {}
+};
+
+
+// The macro instruction is a "typical" macro-instruction. Typical macro-
+// instruction only emit a few instructions, a few being defined as 8 here.
+class MacroEmissionCheckScope : public EmissionCheckScope {
+ public:
+  explicit MacroEmissionCheckScope(MacroAssembler* masm)
+      : EmissionCheckScope(masm, kTypicalMacroInstructionMaxSize) {}
+
+ private:
+  static const size_t kTypicalMacroInstructionMaxSize = 8 * kInstructionSize;
+};
+
+
+enum BranchType {
+  // Copies of architectural conditions.
+  // The associated conditions can be used in place of those, the code will
+  // take care of reinterpreting them with the correct type.
+  integer_eq = eq,
+  integer_ne = ne,
+  integer_hs = hs,
+  integer_lo = lo,
+  integer_mi = mi,
+  integer_pl = pl,
+  integer_vs = vs,
+  integer_vc = vc,
+  integer_hi = hi,
+  integer_ls = ls,
+  integer_ge = ge,
+  integer_lt = lt,
+  integer_gt = gt,
+  integer_le = le,
+  integer_al = al,
+  integer_nv = nv,
+
+  // These two are *different* from the architectural codes al and nv.
+  // 'always' is used to generate unconditional branches.
+  // 'never' is used to not generate a branch (generally as the inverse
+  // branch type of 'always).
+  always, never,
+  // cbz and cbnz
+  reg_zero, reg_not_zero,
+  // tbz and tbnz
+  reg_bit_clear, reg_bit_set,
+
+  // Aliases.
+  kBranchTypeFirstCondition = eq,
+  kBranchTypeLastCondition = nv,
+  kBranchTypeFirstUsingReg = reg_zero,
+  kBranchTypeFirstUsingBit = reg_bit_clear
+};
+
+
+enum DiscardMoveMode { kDontDiscardForSameWReg, kDiscardForSameWReg };
+
+// The macro assembler supports moving automatically pre-shifted immediates for
+// arithmetic and logical instructions, and then applying a post shift in the
+// instruction to undo the modification, in order to reduce the code emitted for
+// an operation. For example:
+//
+//  Add(x0, x0, 0x1f7de) => movz x16, 0xfbef; add x0, x0, x16, lsl #1.
+//
+// This optimisation can be only partially applied when the stack pointer is an
+// operand or destination, so this enumeration is used to control the shift.
+enum PreShiftImmMode {
+  kNoShift,          // Don't pre-shift.
+  kLimitShiftForSP,  // Limit pre-shift for add/sub extend use.
+  kAnyShift          // Allow any pre-shift.
+};
+
+
+class MacroAssembler : public js::jit::Assembler {
+ public:
+  MacroAssembler();
+
+  // Finalize a code buffer of generated instructions. This function must be
+  // called before executing or copying code from the buffer.
+  void FinalizeCode();
+
+
+  // Constant generation helpers.
+  // These functions return the number of instructions required to move the
+  // immediate into the destination register. Also, if the masm pointer is
+  // non-null, it generates the code to do so.
+  // The two features are implemented using one function to avoid duplication of
+  // the logic.
+  // The function can be used to evaluate the cost of synthesizing an
+  // instruction using 'mov immediate' instructions. A user might prefer loading
+  // a constant using the literal pool instead of using multiple 'mov immediate'
+  // instructions.
+  static int MoveImmediateHelper(MacroAssembler* masm,
+                                 const Register &rd,
+                                 uint64_t imm);
+  static bool OneInstrMoveImmediateHelper(MacroAssembler* masm,
+                                          const Register& dst,
+                                          int64_t imm);
+
+
+  // Logical macros.
+  void And(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+  void Ands(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+  void Bic(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+  void Bics(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+  void Orr(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+  void Orn(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+  void Eor(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+  void Eon(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+  void Tst(const Register& rn, const Operand& operand);
+  void LogicalMacro(const Register& rd,
+                    const Register& rn,
+                    const Operand& operand,
+                    LogicalOp op);
+
+  // Add and sub macros.
+  void Add(const Register& rd,
+           const Register& rn,
+           const Operand& operand,
+           FlagsUpdate S = LeaveFlags);
+  void Adds(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+  void Sub(const Register& rd,
+           const Register& rn,
+           const Operand& operand,
+           FlagsUpdate S = LeaveFlags);
+  void Subs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+  void Cmn(const Register& rn, const Operand& operand);
+  void Cmp(const Register& rn, const Operand& operand);
+  void Neg(const Register& rd,
+           const Operand& operand);
+  void Negs(const Register& rd,
+            const Operand& operand);
+
+  void AddSubMacro(const Register& rd,
+                   const Register& rn,
+                   const Operand& operand,
+                   FlagsUpdate S,
+                   AddSubOp op);
+
+  // Add/sub with carry macros.
+  void Adc(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+  void Adcs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+  void Sbc(const Register& rd,
+           const Register& rn,
+           const Operand& operand);
+  void Sbcs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
+  void Ngc(const Register& rd,
+           const Operand& operand);
+  void Ngcs(const Register& rd,
+            const Operand& operand);
+  void AddSubWithCarryMacro(const Register& rd,
+                            const Register& rn,
+                            const Operand& operand,
+                            FlagsUpdate S,
+                            AddSubWithCarryOp op);
+
+  // Move macros.
+  void Mov(const Register& rd, uint64_t imm);
+  void Mov(const Register& rd,
+           const Operand& operand,
+           DiscardMoveMode discard_mode = kDontDiscardForSameWReg);
+  void Mvn(const Register& rd, uint64_t imm) {
+    Mov(rd, (rd.size() == kXRegSize) ? ~imm : (~imm & kWRegMask));
+  }
+  void Mvn(const Register& rd, const Operand& operand);
+
+  // Try to move an immediate into the destination register in a single
+  // instruction. Returns true for success, and updates the contents of dst.
+  // Returns false, otherwise.
+  bool TryOneInstrMoveImmediate(const Register& dst, int64_t imm);
+
+  // Move an immediate into register dst, and return an Operand object for
+  // use with a subsequent instruction that accepts a shift. The value moved
+  // into dst is not necessarily equal to imm; it may have had a shifting
+  // operation applied to it that will be subsequently undone by the shift
+  // applied in the Operand.
+  Operand MoveImmediateForShiftedOp(const Register& dst,
+		                    int64_t imm,
+				    PreShiftImmMode mode);
+
+  // Synthesises the address represented by a MemOperand into a register.
+  void ComputeAddress(const Register& dst, const MemOperand& mem_op);
+
+  // Conditional macros.
+  void Ccmp(const Register& rn,
+            const Operand& operand,
+            StatusFlags nzcv,
+            Condition cond);
+  void Ccmn(const Register& rn,
+            const Operand& operand,
+            StatusFlags nzcv,
+            Condition cond);
+  void ConditionalCompareMacro(const Register& rn,
+                               const Operand& operand,
+                               StatusFlags nzcv,
+                               Condition cond,
+                               ConditionalCompareOp op);
+  void Csel(const Register& rd,
+            const Register& rn,
+            const Operand& operand,
+            Condition cond);
+
+  // Load/store macros.
+#define DECLARE_FUNCTION(FN, REGTYPE, REG, OP) \
+  void FN(const REGTYPE REG, const MemOperand& addr);
+  LS_MACRO_LIST(DECLARE_FUNCTION)
+#undef DECLARE_FUNCTION
+
+  void LoadStoreMacro(const CPURegister& rt,
+                      const MemOperand& addr,
+                      LoadStoreOp op);
+
+#define DECLARE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
+  void FN(const REGTYPE REG, const REGTYPE REG2, const MemOperand& addr);
+  LSPAIR_MACRO_LIST(DECLARE_FUNCTION)
+#undef DECLARE_FUNCTION
+
+  void LoadStorePairMacro(const CPURegister& rt,
+                          const CPURegister& rt2,
+                          const MemOperand& addr,
+                          LoadStorePairOp op);
+
+  void Prfm(PrefetchOperation op, const MemOperand& addr);
+
+  // Push or pop up to 4 registers of the same width to or from the stack,
+  // using the current stack pointer as set by SetStackPointer.
+  //
+  // If an argument register is 'NoReg', all further arguments are also assumed
+  // to be 'NoReg', and are thus not pushed or popped.
+  //
+  // Arguments are ordered such that "Push(a, b);" is functionally equivalent
+  // to "Push(a); Push(b);".
+  //
+  // It is valid to push the same register more than once, and there is no
+  // restriction on the order in which registers are specified.
+  //
+  // It is not valid to pop into the same register more than once in one
+  // operation, not even into the zero register.
+  //
+  // If the current stack pointer (as set by SetStackPointer) is sp, then it
+  // must be aligned to 16 bytes on entry and the total size of the specified
+  // registers must also be a multiple of 16 bytes.
+  //
+  // Even if the current stack pointer is not the system stack pointer (sp),
+  // Push (and derived methods) will still modify the system stack pointer in
+  // order to comply with ABI rules about accessing memory below the system
+  // stack pointer.
+  //
+  // Other than the registers passed into Pop, the stack pointer and (possibly)
+  // the system stack pointer, these methods do not modify any other registers.
+  void Push(const CPURegister& src0, const CPURegister& src1 = NoReg,
+            const CPURegister& src2 = NoReg, const CPURegister& src3 = NoReg);
+  void Pop(const CPURegister& dst0, const CPURegister& dst1 = NoReg,
+           const CPURegister& dst2 = NoReg, const CPURegister& dst3 = NoReg);
+  void PushStackPointer();
+
+  // Alternative forms of Push and Pop, taking a RegList or CPURegList that
+  // specifies the registers that are to be pushed or popped. Higher-numbered
+  // registers are associated with higher memory addresses (as in the A32 push
+  // and pop instructions).
+  //
+  // (Push|Pop)SizeRegList allow you to specify the register size as a
+  // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are
+  // supported.
+  //
+  // Otherwise, (Push|Pop)(CPU|X|W|D|S)RegList is preferred.
+  void PushCPURegList(CPURegList registers);
+  void PopCPURegList(CPURegList registers);
+
+  void PushSizeRegList(RegList registers, unsigned reg_size,
+      CPURegister::RegisterType type = CPURegister::kRegister) {
+    PushCPURegList(CPURegList(type, reg_size, registers));
+  }
+  void PopSizeRegList(RegList registers, unsigned reg_size,
+      CPURegister::RegisterType type = CPURegister::kRegister) {
+    PopCPURegList(CPURegList(type, reg_size, registers));
+  }
+  void PushXRegList(RegList regs) {
+    PushSizeRegList(regs, kXRegSize);
+  }
+  void PopXRegList(RegList regs) {
+    PopSizeRegList(regs, kXRegSize);
+  }
+  void PushWRegList(RegList regs) {
+    PushSizeRegList(regs, kWRegSize);
+  }
+  void PopWRegList(RegList regs) {
+    PopSizeRegList(regs, kWRegSize);
+  }
+  void PushDRegList(RegList regs) {
+    PushSizeRegList(regs, kDRegSize, CPURegister::kVRegister);
+  }
+  void PopDRegList(RegList regs) {
+    PopSizeRegList(regs, kDRegSize, CPURegister::kVRegister);
+  }
+  void PushSRegList(RegList regs) {
+    PushSizeRegList(regs, kSRegSize, CPURegister::kVRegister);
+  }
+  void PopSRegList(RegList regs) {
+    PopSizeRegList(regs, kSRegSize, CPURegister::kVRegister);
+  }
+
+  // Push the specified register 'count' times.
+  void PushMultipleTimes(int count, Register src);
+
+  // Poke 'src' onto the stack. The offset is in bytes.
+  //
+  // If the current stack pointer (as set by SetStackPointer) is sp, then sp
+  // must be aligned to 16 bytes.
+  void Poke(const Register& src, const Operand& offset);
+
+  // Peek at a value on the stack, and put it in 'dst'. The offset is in bytes.
+  //
+  // If the current stack pointer (as set by SetStackPointer) is sp, then sp
+  // must be aligned to 16 bytes.
+  void Peek(const Register& dst, const Operand& offset);
+
+  // Alternative forms of Peek and Poke, taking a RegList or CPURegList that
+  // specifies the registers that are to be pushed or popped. Higher-numbered
+  // registers are associated with higher memory addresses.
+  //
+  // (Peek|Poke)SizeRegList allow you to specify the register size as a
+  // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are
+  // supported.
+  //
+  // Otherwise, (Peek|Poke)(CPU|X|W|D|S)RegList is preferred.
+  void PeekCPURegList(CPURegList registers, int64_t offset) {
+    LoadCPURegList(registers, MemOperand(StackPointer(), offset));
+  }
+  void PokeCPURegList(CPURegList registers, int64_t offset) {
+    StoreCPURegList(registers, MemOperand(StackPointer(), offset));
+  }
+
+  void PeekSizeRegList(RegList registers, int64_t offset, unsigned reg_size,
+      CPURegister::RegisterType type = CPURegister::kRegister) {
+    PeekCPURegList(CPURegList(type, reg_size, registers), offset);
+  }
+  void PokeSizeRegList(RegList registers, int64_t offset, unsigned reg_size,
+      CPURegister::RegisterType type = CPURegister::kRegister) {
+    PokeCPURegList(CPURegList(type, reg_size, registers), offset);
+  }
+  void PeekXRegList(RegList regs, int64_t offset) {
+    PeekSizeRegList(regs, offset, kXRegSize);
+  }
+  void PokeXRegList(RegList regs, int64_t offset) {
+    PokeSizeRegList(regs, offset, kXRegSize);
+  }
+  void PeekWRegList(RegList regs, int64_t offset) {
+    PeekSizeRegList(regs, offset, kWRegSize);
+  }
+  void PokeWRegList(RegList regs, int64_t offset) {
+    PokeSizeRegList(regs, offset, kWRegSize);
+  }
+  void PeekDRegList(RegList regs, int64_t offset) {
+    PeekSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister);
+  }
+  void PokeDRegList(RegList regs, int64_t offset) {
+    PokeSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister);
+  }
+  void PeekSRegList(RegList regs, int64_t offset) {
+    PeekSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister);
+  }
+  void PokeSRegList(RegList regs, int64_t offset) {
+    PokeSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister);
+  }
+
+
+  // Claim or drop stack space without actually accessing memory.
+  //
+  // If the current stack pointer (as set by SetStackPointer) is sp, then it
+  // must be aligned to 16 bytes and the size claimed or dropped must be a
+  // multiple of 16 bytes.
+  void Claim(const Operand& size);
+  void Drop(const Operand& size);
+
+  // Preserve the callee-saved registers (as defined by AAPCS64).
+  //
+  // Higher-numbered registers are pushed before lower-numbered registers, and
+  // thus get higher addresses.
+  // Floating-point registers are pushed before general-purpose registers, and
+  // thus get higher addresses.
+  //
+  // This method must not be called unless StackPointer() is sp, and it is
+  // aligned to 16 bytes.
+  void PushCalleeSavedRegisters();
+
+  // Restore the callee-saved registers (as defined by AAPCS64).
+  //
+  // Higher-numbered registers are popped after lower-numbered registers, and
+  // thus come from higher addresses.
+  // Floating-point registers are popped after general-purpose registers, and
+  // thus come from higher addresses.
+  //
+  // This method must not be called unless StackPointer() is sp, and it is
+  // aligned to 16 bytes.
+  void PopCalleeSavedRegisters();
+
+  void LoadCPURegList(CPURegList registers, const MemOperand& src);
+  void StoreCPURegList(CPURegList registers, const MemOperand& dst);
+
+  // Remaining instructions are simple pass-through calls to the assembler.
+  void Adr(const Register& rd, Label* label) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    adr(rd, label);
+  }
+  void Adrp(const Register& rd, Label* label) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    adrp(rd, label);
+  }
+  void Asr(const Register& rd, const Register& rn, unsigned shift) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    asr(rd, rn, shift);
+  }
+  void Asr(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    asrv(rd, rn, rm);
+  }
+
+  // Branch type inversion relies on these relations.
+  VIXL_STATIC_ASSERT((reg_zero      == (reg_not_zero ^ 1)) &&
+                     (reg_bit_clear == (reg_bit_set ^ 1)) &&
+                     (always        == (never ^ 1)));
+
+  BranchType InvertBranchType(BranchType type) {
+    if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
+      return static_cast<BranchType>(
+          InvertCondition(static_cast<Condition>(type)));
+    } else {
+      return static_cast<BranchType>(type ^ 1);
+    }
+  }
+
+  void B(Label* label, BranchType type, Register reg = NoReg, int bit = -1);
+
+  void B(Label* label);
+  void B(Label* label, Condition cond);
+  void B(Condition cond, Label* label) {
+    B(label, cond);
+  }
+  void Bfm(const Register& rd,
+           const Register& rn,
+           unsigned immr,
+           unsigned imms) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    bfm(rd, rn, immr, imms);
+  }
+  void Bfi(const Register& rd,
+           const Register& rn,
+           unsigned lsb,
+           unsigned width) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    bfi(rd, rn, lsb, width);
+  }
+  void Bfxil(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    bfxil(rd, rn, lsb, width);
+  }
+  void Bind(Label* label);
+  // Bind a label to a specified offset from the start of the buffer.
+  void BindToOffset(Label* label, ptrdiff_t offset);
+  void Bl(Label* label) {
+    SingleEmissionCheckScope guard(this);
+    bl(label);
+  }
+  void Blr(const Register& xn) {
+    VIXL_ASSERT(!xn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    blr(xn);
+  }
+  void Br(const Register& xn) {
+    VIXL_ASSERT(!xn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    br(xn);
+  }
+  void Brk(int code = 0) {
+    SingleEmissionCheckScope guard(this);
+    brk(code);
+  }
+  void Cbnz(const Register& rt, Label* label);
+  void Cbz(const Register& rt, Label* label);
+  void Cinc(const Register& rd, const Register& rn, Condition cond) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    cinc(rd, rn, cond);
+  }
+  void Cinv(const Register& rd, const Register& rn, Condition cond) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    cinv(rd, rn, cond);
+  }
+  void Clrex() {
+    SingleEmissionCheckScope guard(this);
+    clrex();
+  }
+  void Cls(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    cls(rd, rn);
+  }
+  void Clz(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    clz(rd, rn);
+  }
+  void Cneg(const Register& rd, const Register& rn, Condition cond) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    cneg(rd, rn, cond);
+  }
+  void Cset(const Register& rd, Condition cond) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    cset(rd, cond);
+  }
+  void Csetm(const Register& rd, Condition cond) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    csetm(rd, cond);
+  }
+  void Csinc(const Register& rd,
+             const Register& rn,
+             const Register& rm,
+             Condition cond) {
+    VIXL_ASSERT(!rd.IsZero());
+    // The VIXL source code contains these assertions, but the AArch64 ISR
+    // explicitly permits the use of zero registers. CSET itself is defined
+    // in terms of CSINC with WZR/XZR.
+    //
+    // VIXL_ASSERT(!rn.IsZero());
+    // VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
+    csinc(rd, rn, rm, cond);
+  }
+  void Csinv(const Register& rd,
+             const Register& rn,
+             const Register& rm,
+             Condition cond) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
+    csinv(rd, rn, rm, cond);
+  }
+  void Csneg(const Register& rd,
+             const Register& rn,
+             const Register& rm,
+             Condition cond) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
+    csneg(rd, rn, rm, cond);
+  }
+  void Dmb(BarrierDomain domain, BarrierType type) {
+    SingleEmissionCheckScope guard(this);
+    dmb(domain, type);
+  }
+  void Dsb(BarrierDomain domain, BarrierType type) {
+    SingleEmissionCheckScope guard(this);
+    dsb(domain, type);
+  }
+  void Extr(const Register& rd,
+            const Register& rn,
+            const Register& rm,
+            unsigned lsb) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    extr(rd, rn, rm, lsb);
+  }
+  void Fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fadd(vd, vn, vm);
+  }
+  void Fccmp(const VRegister& vn,
+             const VRegister& vm,
+             StatusFlags nzcv,
+             Condition cond,
+             FPTrapFlags trap = DisableTrap) {
+    VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
+    FPCCompareMacro(vn, vm, nzcv, cond, trap);
+  }
+  void Fccmpe(const VRegister& vn,
+              const VRegister& vm,
+              StatusFlags nzcv,
+              Condition cond) {
+    Fccmp(vn, vm, nzcv, cond, EnableTrap);
+  }
+  void Fcmp(const VRegister& vn, const VRegister& vm,
+            FPTrapFlags trap = DisableTrap) {
+    SingleEmissionCheckScope guard(this);
+    FPCompareMacro(vn, vm, trap);
+  }
+  void Fcmp(const VRegister& vn, double value,
+            FPTrapFlags trap = DisableTrap);
+  void Fcmpe(const VRegister& vn, double value);
+  void Fcmpe(const VRegister& vn, const VRegister& vm) {
+    Fcmp(vn, vm, EnableTrap);
+  }
+  void Fcsel(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             Condition cond) {
+    VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
+    fcsel(vd, vn, vm, cond);
+  }
+  void Fcvt(const VRegister& vd, const VRegister& vn) {
+    SingleEmissionCheckScope guard(this);
+    fcvt(vd, vn);
+  }
+  void Fcvtl(const VRegister& vd, const VRegister& vn) {
+    SingleEmissionCheckScope guard(this);
+    fcvtl(vd, vn);
+  }
+  void Fcvtl2(const VRegister& vd, const VRegister& vn) {
+    SingleEmissionCheckScope guard(this);
+    fcvtl2(vd, vn);
+  }
+  void Fcvtn(const VRegister& vd, const VRegister& vn) {
+    SingleEmissionCheckScope guard(this);
+    fcvtn(vd, vn);
+  }
+  void Fcvtn2(const VRegister& vd, const VRegister& vn) {
+    SingleEmissionCheckScope guard(this);
+    fcvtn2(vd, vn);
+  }
+  void Fcvtxn(const VRegister& vd, const VRegister& vn) {
+    SingleEmissionCheckScope guard(this);
+    fcvtxn(vd, vn);
+  }
+  void Fcvtxn2(const VRegister& vd, const VRegister& vn) {
+    SingleEmissionCheckScope guard(this);
+    fcvtxn2(vd, vn);
+  }
+  void Fcvtas(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtas(rd, vn);
+  }
+  void Fcvtau(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtau(rd, vn);
+  }
+  void Fcvtms(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtms(rd, vn);
+  }
+  void Fcvtmu(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtmu(rd, vn);
+  }
+  void Fcvtns(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtns(rd, vn);
+  }
+  void Fcvtnu(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtnu(rd, vn);
+  }
+  void Fcvtps(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtps(rd, vn);
+  }
+  void Fcvtpu(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtpu(rd, vn);
+  }
+  void Fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtzs(rd, vn, fbits);
+  }
+  void Fjcvtzs(const Register& rd, const VRegister& vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fjcvtzs(rd, vn);
+  }
+  void Fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fcvtzu(rd, vn, fbits);
+  }
+  void Fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fdiv(vd, vn, vm);
+  }
+  void Fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fmax(vd, vn, vm);
+  }
+  void Fmaxnm(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fmaxnm(vd, vn, vm);
+  }
+  void Fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fmin(vd, vn, vm);
+  }
+  void Fminnm(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fminnm(vd, vn, vm);
+  }
+  void Fmov(VRegister vd, VRegister vn) {
+    SingleEmissionCheckScope guard(this);
+    // Only emit an instruction if vd and vn are different, and they are both D
+    // registers. fmov(s0, s0) is not a no-op because it clears the top word of
+    // d0. Technically, fmov(d0, d0) is not a no-op either because it clears
+    // the top of q0, but VRegister does not currently support Q registers.
+    if (!vd.Is(vn) || !vd.Is64Bits()) {
+      fmov(vd, vn);
+    }
+  }
+  void Fmov(VRegister vd, Register rn) {
+    SingleEmissionCheckScope guard(this);
+    fmov(vd, rn);
+  }
+  void Fmov(const VRegister& vd, int index, const Register& rn) {
+    SingleEmissionCheckScope guard(this);
+    fmov(vd, index, rn);
+  }
+  void Fmov(const Register& rd, const VRegister& vn, int index) {
+    SingleEmissionCheckScope guard(this);
+    fmov(rd, vn, index);
+  }
+
+  // Provide explicit double and float interfaces for FP immediate moves, rather
+  // than relying on implicit C++ casts. This allows signalling NaNs to be
+  // preserved when the immediate matches the format of vd. Most systems convert
+  // signalling NaNs to quiet NaNs when converting between float and double.
+  void Fmov(VRegister vd, double imm);
+  void Fmov(VRegister vd, float imm);
+  // Provide a template to allow other types to be converted automatically.
+  template<typename T>
+  void Fmov(VRegister vd, T imm) {
+    Fmov(vd, static_cast<double>(imm));
+  }
+  void Fmov(Register rd, VRegister vn) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    fmov(rd, vn);
+  }
+  void Fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fmul(vd, vn, vm);
+  }
+  void Fnmul(const VRegister& vd, const VRegister& vn,
+             const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fnmul(vd, vn, vm);
+  }
+  void Fmadd(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             const VRegister& va) {
+    SingleEmissionCheckScope guard(this);
+    fmadd(vd, vn, vm, va);
+  }
+  void Fmsub(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             const VRegister& va) {
+    SingleEmissionCheckScope guard(this);
+    fmsub(vd, vn, vm, va);
+  }
+  void Fnmadd(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              const VRegister& va) {
+    SingleEmissionCheckScope guard(this);
+    fnmadd(vd, vn, vm, va);
+  }
+  void Fnmsub(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              const VRegister& va) {
+    SingleEmissionCheckScope guard(this);
+    fnmsub(vd, vn, vm, va);
+  }
+  void Fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    fsub(vd, vn, vm);
+  }
+  void Hint(SystemHint code) {
+    SingleEmissionCheckScope guard(this);
+    hint(code);
+  }
+  void Hlt(int code) {
+    SingleEmissionCheckScope guard(this);
+    hlt(code);
+  }
+  void Isb() {
+    SingleEmissionCheckScope guard(this);
+    isb();
+  }
+  void Ldar(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldar(rt, src);
+  }
+  void Ldarb(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldarb(rt, src);
+  }
+  void Ldarh(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldarh(rt, src);
+  }
+  void Ldaxp(const Register& rt, const Register& rt2, const MemOperand& src) {
+    VIXL_ASSERT(!rt.Aliases(rt2));
+    SingleEmissionCheckScope guard(this);
+    ldaxp(rt, rt2, src);
+  }
+  void Ldaxr(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldaxr(rt, src);
+  }
+  void Ldaxrb(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldaxrb(rt, src);
+  }
+  void Ldaxrh(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldaxrh(rt, src);
+  }
+
+// clang-format off
+#define COMPARE_AND_SWAP_SINGLE_MACRO_LIST(V) \
+  V(cas,    Cas)                              \
+  V(casa,   Casa)                             \
+  V(casl,   Casl)                             \
+  V(casal,  Casal)                            \
+  V(casb,   Casb)                             \
+  V(casab,  Casab)                            \
+  V(caslb,  Caslb)                            \
+  V(casalb, Casalb)                           \
+  V(cash,   Cash)                             \
+  V(casah,  Casah)                            \
+  V(caslh,  Caslh)                            \
+  V(casalh, Casalh)
+  // clang-format on
+
+#define DEFINE_MACRO_ASM_FUNC(ASM, MASM)                                     \
+  void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
+    SingleEmissionCheckScope guard(this);                                    \
+    ASM(rs, rt, src);                                                        \
+  }
+  COMPARE_AND_SWAP_SINGLE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+#undef DEFINE_MACRO_ASM_FUNC
+
+// clang-format off
+#define COMPARE_AND_SWAP_PAIR_MACRO_LIST(V) \
+  V(casp,   Casp)                           \
+  V(caspa,  Caspa)                          \
+  V(caspl,  Caspl)                          \
+  V(caspal, Caspal)
+  // clang-format on
+
+#define DEFINE_MACRO_ASM_FUNC(ASM, MASM)                                 \
+  void MASM(const Register& rs, const Register& rs2, const Register& rt, \
+            const Register& rt2, const MemOperand& src) {                \
+    SingleEmissionCheckScope guard(this);                                \
+    ASM(rs, rs2, rt, rt2, src);                                          \
+  }
+  COMPARE_AND_SWAP_PAIR_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+#undef DEFINE_MACRO_ASM_FUNC
+
+// These macros generate all the variations of the atomic memory operations,
+// e.g. ldadd, ldadda, ldaddb, staddl, etc.
+
+// clang-format off
+#define ATOMIC_MEMORY_SIMPLE_MACRO_LIST(V, DEF, MASM_PRE, ASM_PRE) \
+  V(DEF, MASM_PRE##add,  ASM_PRE##add)                             \
+  V(DEF, MASM_PRE##clr,  ASM_PRE##clr)                             \
+  V(DEF, MASM_PRE##eor,  ASM_PRE##eor)                             \
+  V(DEF, MASM_PRE##set,  ASM_PRE##set)                             \
+  V(DEF, MASM_PRE##smax, ASM_PRE##smax)                            \
+  V(DEF, MASM_PRE##smin, ASM_PRE##smin)                            \
+  V(DEF, MASM_PRE##umax, ASM_PRE##umax)                            \
+  V(DEF, MASM_PRE##umin, ASM_PRE##umin)
+
+#define ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \
+  V(MASM,     ASM)                                    \
+  V(MASM##l,  ASM##l)                                 \
+  V(MASM##b,  ASM##b)                                 \
+  V(MASM##lb, ASM##lb)                                \
+  V(MASM##h,  ASM##h)                                 \
+  V(MASM##lh, ASM##lh)
+
+#define ATOMIC_MEMORY_LOAD_MACRO_MODES(V, MASM, ASM) \
+  ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM)      \
+  V(MASM##a,   ASM##a)                               \
+  V(MASM##al,  ASM##al)                              \
+  V(MASM##ab,  ASM##ab)                              \
+  V(MASM##alb, ASM##alb)                             \
+  V(MASM##ah,  ASM##ah)                              \
+  V(MASM##alh, ASM##alh)
+  // clang-format on
+
+#define DEFINE_MACRO_LOAD_ASM_FUNC(MASM, ASM)                                \
+  void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
+    SingleEmissionCheckScope guard(this);                                    \
+    ASM(rs, rt, src);                                                        \
+  }
+#define DEFINE_MACRO_STORE_ASM_FUNC(MASM, ASM)           \
+  void MASM(const Register& rs, const MemOperand& src) { \
+    SingleEmissionCheckScope guard(this);                \
+    ASM(rs, src);                                        \
+  }
+
+  ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_LOAD_MACRO_MODES,
+                                  DEFINE_MACRO_LOAD_ASM_FUNC,
+                                  Ld,
+                                  ld)
+  ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_STORE_MACRO_MODES,
+                                  DEFINE_MACRO_STORE_ASM_FUNC,
+                                  St,
+                                  st)
+
+#define DEFINE_MACRO_SWP_ASM_FUNC(MASM, ASM)                                 \
+  void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
+    SingleEmissionCheckScope guard(this);                                    \
+    ASM(rs, rt, src);                                                        \
+  }
+
+  ATOMIC_MEMORY_LOAD_MACRO_MODES(DEFINE_MACRO_SWP_ASM_FUNC, Swp, swp)
+
+#undef DEFINE_MACRO_LOAD_ASM_FUNC
+#undef DEFINE_MACRO_STORE_ASM_FUNC
+#undef DEFINE_MACRO_SWP_ASM_FUNC
+
+  void Ldnp(const CPURegister& rt,
+            const CPURegister& rt2,
+            const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldnp(rt, rt2, src);
+  }
+  // Provide both double and float interfaces for FP immediate loads, rather
+  // than relying on implicit C++ casts. This allows signalling NaNs to be
+  // preserved when the immediate matches the format of fd. Most systems convert
+  // signalling NaNs to quiet NaNs when converting between float and double.
+  void Ldr(const VRegister& vt, double imm) {
+    SingleEmissionCheckScope guard(this);
+    if (vt.Is64Bits()) {
+      ldr(vt, imm);
+    } else {
+      ldr(vt, static_cast<float>(imm));
+    }
+  }
+  void Ldr(const VRegister& vt, float imm) {
+    SingleEmissionCheckScope guard(this);
+    if (vt.Is32Bits()) {
+      ldr(vt, imm);
+    } else {
+      ldr(vt, static_cast<double>(imm));
+    }
+  }
+  /*
+  void Ldr(const VRegister& vt, uint64_t high64, uint64_t low64) {
+    VIXL_ASSERT(vt.IsQ());
+    SingleEmissionCheckScope guard(this);
+    ldr(vt, new Literal<uint64_t>(high64, low64,
+                                  &literal_pool_,
+                                  RawLiteral::kDeletedOnPlacementByPool));
+  }
+  */
+  void Ldr(const Register& rt, uint64_t imm) {
+    VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
+    ldr(rt, imm);
+  }
+  void Ldrsw(const Register& rt, uint32_t imm) {
+    VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
+    ldrsw(rt, imm);
+  }
+  void Ldxp(const Register& rt, const Register& rt2, const MemOperand& src) {
+    VIXL_ASSERT(!rt.Aliases(rt2));
+    SingleEmissionCheckScope guard(this);
+    ldxp(rt, rt2, src);
+  }
+  void Ldxr(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldxr(rt, src);
+  }
+  void Ldxrb(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldxrb(rt, src);
+  }
+  void Ldxrh(const Register& rt, const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ldxrh(rt, src);
+  }
+  void Lsl(const Register& rd, const Register& rn, unsigned shift) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    lsl(rd, rn, shift);
+  }
+  void Lsl(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    lslv(rd, rn, rm);
+  }
+  void Lsr(const Register& rd, const Register& rn, unsigned shift) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    lsr(rd, rn, shift);
+  }
+  void Lsr(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    lsrv(rd, rn, rm);
+  }
+  void Madd(const Register& rd,
+            const Register& rn,
+            const Register& rm,
+            const Register& ra) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
+    madd(rd, rn, rm, ra);
+  }
+  void Mneg(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    mneg(rd, rn, rm);
+  }
+  void Mov(const Register& rd, const Register& rn) {
+    SingleEmissionCheckScope guard(this);
+    mov(rd, rn);
+  }
+  void Movk(const Register& rd, uint64_t imm, int shift = -1) {
+    VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
+    movk(rd, imm, shift);
+  }
+  void Mrs(const Register& rt, SystemRegister sysreg) {
+    VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
+    mrs(rt, sysreg);
+  }
+  void Msr(SystemRegister sysreg, const Register& rt) {
+    VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
+    msr(sysreg, rt);
+  }
+  void Sys(int op1, int crn, int crm, int op2, const Register& rt = xzr) {
+    SingleEmissionCheckScope guard(this);
+    sys(op1, crn, crm, op2, rt);
+  }
+  void Dc(DataCacheOp op, const Register& rt) {
+    SingleEmissionCheckScope guard(this);
+    dc(op, rt);
+  }
+  void Ic(InstructionCacheOp op, const Register& rt) {
+    SingleEmissionCheckScope guard(this);
+    ic(op, rt);
+  }
+  void Msub(const Register& rd,
+            const Register& rn,
+            const Register& rm,
+            const Register& ra) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
+    msub(rd, rn, rm, ra);
+  }
+  void Mul(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    mul(rd, rn, rm);
+  }
+  void Nop() {
+    SingleEmissionCheckScope guard(this);
+    nop();
+  }
+  void Csdb() {
+    SingleEmissionCheckScope guard(this);
+    csdb();
+  }
+  void Rbit(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    rbit(rd, rn);
+  }
+  void Ret(const Register& xn = lr) {
+    VIXL_ASSERT(!xn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    ret(xn);
+  }
+  void Rev(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    rev(rd, rn);
+  }
+  void Rev16(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    rev16(rd, rn);
+  }
+  void Rev32(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    rev32(rd, rn);
+  }
+  void Ror(const Register& rd, const Register& rs, unsigned shift) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rs.IsZero());
+    SingleEmissionCheckScope guard(this);
+    ror(rd, rs, shift);
+  }
+  void Ror(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    rorv(rd, rn, rm);
+  }
+  void Sbfiz(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    sbfiz(rd, rn, lsb, width);
+  }
+  void Sbfm(const Register& rd,
+            const Register& rn,
+            unsigned immr,
+            unsigned imms) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    sbfm(rd, rn, immr, imms);
+  }
+  void Sbfx(const Register& rd,
+            const Register& rn,
+            unsigned lsb,
+            unsigned width) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    sbfx(rd, rn, lsb, width);
+  }
+  void Scvtf(const VRegister& vd, const Register& rn, int fbits = 0) {
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    scvtf(vd, rn, fbits);
+  }
+  void Sdiv(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    sdiv(rd, rn, rm);
+  }
+  void Smaddl(const Register& rd,
+              const Register& rn,
+              const Register& rm,
+              const Register& ra) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
+    smaddl(rd, rn, rm, ra);
+  }
+  void Smsubl(const Register& rd,
+              const Register& rn,
+              const Register& rm,
+              const Register& ra) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
+    smsubl(rd, rn, rm, ra);
+  }
+  void Smull(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    smull(rd, rn, rm);
+  }
+  void Smulh(const Register& xd, const Register& xn, const Register& xm) {
+    VIXL_ASSERT(!xd.IsZero());
+    VIXL_ASSERT(!xn.IsZero());
+    VIXL_ASSERT(!xm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    smulh(xd, xn, xm);
+  }
+  void Stlr(const Register& rt, const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    stlr(rt, dst);
+  }
+  void Stlrb(const Register& rt, const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    stlrb(rt, dst);
+  }
+  void Stlrh(const Register& rt, const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    stlrh(rt, dst);
+  }
+  void Stlxp(const Register& rs,
+             const Register& rt,
+             const Register& rt2,
+             const MemOperand& dst) {
+    VIXL_ASSERT(!rs.Aliases(dst.base()));
+    VIXL_ASSERT(!rs.Aliases(rt));
+    VIXL_ASSERT(!rs.Aliases(rt2));
+    SingleEmissionCheckScope guard(this);
+    stlxp(rs, rt, rt2, dst);
+  }
+  void Stlxr(const Register& rs, const Register& rt, const MemOperand& dst) {
+    VIXL_ASSERT(!rs.Aliases(dst.base()));
+    VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
+    stlxr(rs, rt, dst);
+  }
+  void Stlxrb(const Register& rs, const Register& rt, const MemOperand& dst) {
+    VIXL_ASSERT(!rs.Aliases(dst.base()));
+    VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
+    stlxrb(rs, rt, dst);
+  }
+  void Stlxrh(const Register& rs, const Register& rt, const MemOperand& dst) {
+    VIXL_ASSERT(!rs.Aliases(dst.base()));
+    VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
+    stlxrh(rs, rt, dst);
+  }
+  void Stnp(const CPURegister& rt,
+            const CPURegister& rt2,
+            const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    stnp(rt, rt2, dst);
+  }
+  void Stxp(const Register& rs,
+            const Register& rt,
+            const Register& rt2,
+            const MemOperand& dst) {
+    VIXL_ASSERT(!rs.Aliases(dst.base()));
+    VIXL_ASSERT(!rs.Aliases(rt));
+    VIXL_ASSERT(!rs.Aliases(rt2));
+    SingleEmissionCheckScope guard(this);
+    stxp(rs, rt, rt2, dst);
+  }
+  void Stxr(const Register& rs, const Register& rt, const MemOperand& dst) {
+    VIXL_ASSERT(!rs.Aliases(dst.base()));
+    VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
+    stxr(rs, rt, dst);
+  }
+  void Stxrb(const Register& rs, const Register& rt, const MemOperand& dst) {
+    VIXL_ASSERT(!rs.Aliases(dst.base()));
+    VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
+    stxrb(rs, rt, dst);
+  }
+  void Stxrh(const Register& rs, const Register& rt, const MemOperand& dst) {
+    VIXL_ASSERT(!rs.Aliases(dst.base()));
+    VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
+    stxrh(rs, rt, dst);
+  }
+  void Svc(int code) {
+    SingleEmissionCheckScope guard(this);
+    svc(code);
+  }
+  void Sxtb(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    sxtb(rd, rn);
+  }
+  void Sxth(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    sxth(rd, rn);
+  }
+  void Sxtw(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    sxtw(rd, rn);
+  }
+  void Tbl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    tbl(vd, vn, vm);
+  }
+  void Tbl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    tbl(vd, vn, vn2, vm);
+  }
+  void Tbl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vn3,
+           const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    tbl(vd, vn, vn2, vn3, vm);
+  }
+  void Tbl(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vn3,
+           const VRegister& vn4,
+           const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    tbl(vd, vn, vn2, vn3, vn4, vm);
+  }
+  void Tbx(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    tbx(vd, vn, vm);
+  }
+  void Tbx(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    tbx(vd, vn, vn2, vm);
+  }
+  void Tbx(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vn3,
+           const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    tbx(vd, vn, vn2, vn3, vm);
+  }
+  void Tbx(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vn2,
+           const VRegister& vn3,
+           const VRegister& vn4,
+           const VRegister& vm) {
+    SingleEmissionCheckScope guard(this);
+    tbx(vd, vn, vn2, vn3, vn4, vm);
+  }
+  void Tbnz(const Register& rt, unsigned bit_pos, Label* label);
+  void Tbz(const Register& rt, unsigned bit_pos, Label* label);
+  void Ubfiz(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    ubfiz(rd, rn, lsb, width);
+  }
+  void Ubfm(const Register& rd,
+            const Register& rn,
+            unsigned immr,
+            unsigned imms) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    ubfm(rd, rn, immr, imms);
+  }
+  void Ubfx(const Register& rd,
+            const Register& rn,
+            unsigned lsb,
+            unsigned width) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    ubfx(rd, rn, lsb, width);
+  }
+  void Ucvtf(const VRegister& vd, const Register& rn, int fbits = 0) {
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    ucvtf(vd, rn, fbits);
+  }
+  void Udiv(const Register& rd, const Register& rn, const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    udiv(rd, rn, rm);
+  }
+  void Umaddl(const Register& rd,
+              const Register& rn,
+              const Register& rm,
+              const Register& ra) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
+    umaddl(rd, rn, rm, ra);
+  }
+  void Umull(const Register& rd,
+             const Register& rn,
+             const Register& rm) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    umull(rd, rn, rm);
+  }
+  void Umulh(const Register& xd, const Register& xn, const Register& xm) {
+    VIXL_ASSERT(!xd.IsZero());
+    VIXL_ASSERT(!xn.IsZero());
+    VIXL_ASSERT(!xm.IsZero());
+    SingleEmissionCheckScope guard(this);
+    umulh(xd, xn, xm);
+  }
+  void Umsubl(const Register& rd,
+              const Register& rn,
+              const Register& rm,
+              const Register& ra) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rm.IsZero());
+    VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
+    umsubl(rd, rn, rm, ra);
+  }
+
+  void Unreachable() {
+    SingleEmissionCheckScope guard(this);
+    Emit(UNDEFINED_INST_PATTERN);
+  }
+
+  void Uxtb(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    uxtb(rd, rn);
+  }
+  void Uxth(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    uxth(rd, rn);
+  }
+  void Uxtw(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
+    uxtw(rd, rn);
+  }
+
+  // NEON 3 vector register instructions.
+  #define NEON_3VREG_MACRO_LIST(V) \
+    V(add, Add)                    \
+    V(addhn, Addhn)                \
+    V(addhn2, Addhn2)              \
+    V(addp, Addp)                  \
+    V(and_, And)                   \
+    V(bic, Bic)                    \
+    V(bif, Bif)                    \
+    V(bit, Bit)                    \
+    V(bsl, Bsl)                    \
+    V(cmeq, Cmeq)                  \
+    V(cmge, Cmge)                  \
+    V(cmgt, Cmgt)                  \
+    V(cmhi, Cmhi)                  \
+    V(cmhs, Cmhs)                  \
+    V(cmtst, Cmtst)                \
+    V(eor, Eor)                    \
+    V(fabd, Fabd)                  \
+    V(facge, Facge)                \
+    V(facgt, Facgt)                \
+    V(faddp, Faddp)                \
+    V(fcmeq, Fcmeq)                \
+    V(fcmge, Fcmge)                \
+    V(fcmgt, Fcmgt)                \
+    V(fmaxnmp, Fmaxnmp)            \
+    V(fmaxp, Fmaxp)                \
+    V(fminnmp, Fminnmp)            \
+    V(fminp, Fminp)                \
+    V(fmla, Fmla)                  \
+    V(fmls, Fmls)                  \
+    V(fmulx, Fmulx)                \
+    V(frecps, Frecps)              \
+    V(frsqrts, Frsqrts)            \
+    V(mla, Mla)                    \
+    V(mls, Mls)                    \
+    V(mul, Mul)                    \
+    V(orn, Orn)                    \
+    V(orr, Orr)                    \
+    V(pmul, Pmul)                  \
+    V(pmull, Pmull)                \
+    V(pmull2, Pmull2)              \
+    V(raddhn, Raddhn)              \
+    V(raddhn2, Raddhn2)            \
+    V(rsubhn, Rsubhn)              \
+    V(rsubhn2, Rsubhn2)            \
+    V(saba, Saba)                  \
+    V(sabal, Sabal)                \
+    V(sabal2, Sabal2)              \
+    V(sabd, Sabd)                  \
+    V(sabdl, Sabdl)                \
+    V(sabdl2, Sabdl2)              \
+    V(saddl, Saddl)                \
+    V(saddl2, Saddl2)              \
+    V(saddw, Saddw)                \
+    V(saddw2, Saddw2)              \
+    V(shadd, Shadd)                \
+    V(shsub, Shsub)                \
+    V(smax, Smax)                  \
+    V(smaxp, Smaxp)                \
+    V(smin, Smin)                  \
+    V(sminp, Sminp)                \
+    V(smlal, Smlal)                \
+    V(smlal2, Smlal2)              \
+    V(smlsl, Smlsl)                \
+    V(smlsl2, Smlsl2)              \
+    V(smull, Smull)                \
+    V(smull2, Smull2)              \
+    V(sqadd, Sqadd)                \
+    V(sqdmlal, Sqdmlal)            \
+    V(sqdmlal2, Sqdmlal2)          \
+    V(sqdmlsl, Sqdmlsl)            \
+    V(sqdmlsl2, Sqdmlsl2)          \
+    V(sqdmulh, Sqdmulh)            \
+    V(sqdmull, Sqdmull)            \
+    V(sqdmull2, Sqdmull2)          \
+    V(sqrdmulh, Sqrdmulh)          \
+    V(sqrshl, Sqrshl)              \
+    V(sqshl, Sqshl)                \
+    V(sqsub, Sqsub)                \
+    V(srhadd, Srhadd)              \
+    V(srshl, Srshl)                \
+    V(sshl, Sshl)                  \
+    V(ssubl, Ssubl)                \
+    V(ssubl2, Ssubl2)              \
+    V(ssubw, Ssubw)                \
+    V(ssubw2, Ssubw2)              \
+    V(sub, Sub)                    \
+    V(subhn, Subhn)                \
+    V(subhn2, Subhn2)              \
+    V(trn1, Trn1)                  \
+    V(trn2, Trn2)                  \
+    V(uaba, Uaba)                  \
+    V(uabal, Uabal)                \
+    V(uabal2, Uabal2)              \
+    V(uabd, Uabd)                  \
+    V(uabdl, Uabdl)                \
+    V(uabdl2, Uabdl2)              \
+    V(uaddl, Uaddl)                \
+    V(uaddl2, Uaddl2)              \
+    V(uaddw, Uaddw)                \
+    V(uaddw2, Uaddw2)              \
+    V(uhadd, Uhadd)                \
+    V(uhsub, Uhsub)                \
+    V(umax, Umax)                  \
+    V(umaxp, Umaxp)                \
+    V(umin, Umin)                  \
+    V(uminp, Uminp)                \
+    V(umlal, Umlal)                \
+    V(umlal2, Umlal2)              \
+    V(umlsl, Umlsl)                \
+    V(umlsl2, Umlsl2)              \
+    V(umull, Umull)                \
+    V(umull2, Umull2)              \
+    V(uqadd, Uqadd)                \
+    V(uqrshl, Uqrshl)              \
+    V(uqshl, Uqshl)                \
+    V(uqsub, Uqsub)                \
+    V(urhadd, Urhadd)              \
+    V(urshl, Urshl)                \
+    V(ushl, Ushl)                  \
+    V(usubl, Usubl)                \
+    V(usubl2, Usubl2)              \
+    V(usubw, Usubw)                \
+    V(usubw2, Usubw2)              \
+    V(uzp1, Uzp1)                  \
+    V(uzp2, Uzp2)                  \
+    V(zip1, Zip1)                  \
+    V(zip2, Zip2)
+
+  #define DEFINE_MACRO_ASM_FUNC(ASM, MASM)   \
+  void MASM(const VRegister& vd,             \
+            const VRegister& vn,             \
+            const VRegister& vm) {           \
+    SingleEmissionCheckScope guard(this);    \
+    ASM(vd, vn, vm);                         \
+  }
+  NEON_3VREG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+  #undef DEFINE_MACRO_ASM_FUNC
+
+  // NEON 2 vector register instructions.
+  #define NEON_2VREG_MACRO_LIST(V) \
+    V(abs,     Abs)                \
+    V(addp,    Addp)               \
+    V(addv,    Addv)               \
+    V(cls,     Cls)                \
+    V(clz,     Clz)                \
+    V(cnt,     Cnt)                \
+    V(fabs,    Fabs)               \
+    V(faddp,   Faddp)              \
+    V(fcvtas,  Fcvtas)             \
+    V(fcvtau,  Fcvtau)             \
+    V(fcvtms,  Fcvtms)             \
+    V(fcvtmu,  Fcvtmu)             \
+    V(fcvtns,  Fcvtns)             \
+    V(fcvtnu,  Fcvtnu)             \
+    V(fcvtps,  Fcvtps)             \
+    V(fcvtpu,  Fcvtpu)             \
+    V(fmaxnmp, Fmaxnmp)            \
+    V(fmaxnmv, Fmaxnmv)            \
+    V(fmaxp,   Fmaxp)              \
+    V(fmaxv,   Fmaxv)              \
+    V(fminnmp, Fminnmp)            \
+    V(fminnmv, Fminnmv)            \
+    V(fminp,   Fminp)              \
+    V(fminv,   Fminv)              \
+    V(fneg,    Fneg)               \
+    V(frecpe,  Frecpe)             \
+    V(frecpx,  Frecpx)             \
+    V(frinta,  Frinta)             \
+    V(frinti,  Frinti)             \
+    V(frintm,  Frintm)             \
+    V(frintn,  Frintn)             \
+    V(frintp,  Frintp)             \
+    V(frintx,  Frintx)             \
+    V(frintz,  Frintz)             \
+    V(frsqrte, Frsqrte)            \
+    V(fsqrt,   Fsqrt)              \
+    V(mov,     Mov)                \
+    V(mvn,     Mvn)                \
+    V(neg,     Neg)                \
+    V(not_,    Not)                \
+    V(rbit,    Rbit)               \
+    V(rev16,   Rev16)              \
+    V(rev32,   Rev32)              \
+    V(rev64,   Rev64)              \
+    V(sadalp,  Sadalp)             \
+    V(saddlp,  Saddlp)             \
+    V(saddlv,  Saddlv)             \
+    V(smaxv,   Smaxv)              \
+    V(sminv,   Sminv)              \
+    V(sqabs,   Sqabs)              \
+    V(sqneg,   Sqneg)              \
+    V(sqxtn,   Sqxtn)              \
+    V(sqxtn2,  Sqxtn2)             \
+    V(sqxtun,  Sqxtun)             \
+    V(sqxtun2, Sqxtun2)            \
+    V(suqadd,  Suqadd)             \
+    V(sxtl,    Sxtl)               \
+    V(sxtl2,   Sxtl2)              \
+    V(uadalp,  Uadalp)             \
+    V(uaddlp,  Uaddlp)             \
+    V(uaddlv,  Uaddlv)             \
+    V(umaxv,   Umaxv)              \
+    V(uminv,   Uminv)              \
+    V(uqxtn,   Uqxtn)              \
+    V(uqxtn2,  Uqxtn2)             \
+    V(urecpe,  Urecpe)             \
+    V(ursqrte, Ursqrte)            \
+    V(usqadd,  Usqadd)             \
+    V(uxtl,    Uxtl)               \
+    V(uxtl2,   Uxtl2)              \
+    V(xtn,     Xtn)                \
+    V(xtn2,    Xtn2)
+
+  #define DEFINE_MACRO_ASM_FUNC(ASM, MASM)   \
+  void MASM(const VRegister& vd,             \
+            const VRegister& vn) {           \
+    SingleEmissionCheckScope guard(this);    \
+    ASM(vd, vn);                             \
+  }
+  NEON_2VREG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+  #undef DEFINE_MACRO_ASM_FUNC
+
+  // NEON 2 vector register with immediate instructions.
+  #define NEON_2VREG_FPIMM_MACRO_LIST(V) \
+    V(fcmeq, Fcmeq)                      \
+    V(fcmge, Fcmge)                      \
+    V(fcmgt, Fcmgt)                      \
+    V(fcmle, Fcmle)                      \
+    V(fcmlt, Fcmlt)
+
+  #define DEFINE_MACRO_ASM_FUNC(ASM, MASM)   \
+  void MASM(const VRegister& vd,             \
+            const VRegister& vn,             \
+            double imm) {                    \
+    SingleEmissionCheckScope guard(this);    \
+    ASM(vd, vn, imm);                        \
+  }
+  NEON_2VREG_FPIMM_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+  #undef DEFINE_MACRO_ASM_FUNC
+
+  // NEON by element instructions.
+  #define NEON_BYELEMENT_MACRO_LIST(V) \
+    V(fmul, Fmul)                      \
+    V(fmla, Fmla)                      \
+    V(fmls, Fmls)                      \
+    V(fmulx, Fmulx)                    \
+    V(mul, Mul)                        \
+    V(mla, Mla)                        \
+    V(mls, Mls)                        \
+    V(sqdmulh, Sqdmulh)                \
+    V(sqrdmulh, Sqrdmulh)              \
+    V(sqdmull,  Sqdmull)               \
+    V(sqdmull2, Sqdmull2)              \
+    V(sqdmlal,  Sqdmlal)               \
+    V(sqdmlal2, Sqdmlal2)              \
+    V(sqdmlsl,  Sqdmlsl)               \
+    V(sqdmlsl2, Sqdmlsl2)              \
+    V(smull,  Smull)                   \
+    V(smull2, Smull2)                  \
+    V(smlal,  Smlal)                   \
+    V(smlal2, Smlal2)                  \
+    V(smlsl,  Smlsl)                   \
+    V(smlsl2, Smlsl2)                  \
+    V(umull,  Umull)                   \
+    V(umull2, Umull2)                  \
+    V(umlal,  Umlal)                   \
+    V(umlal2, Umlal2)                  \
+    V(umlsl,  Umlsl)                   \
+    V(umlsl2, Umlsl2)
+
+  #define DEFINE_MACRO_ASM_FUNC(ASM, MASM)   \
+  void MASM(const VRegister& vd,             \
+            const VRegister& vn,             \
+            const VRegister& vm,             \
+            int vm_index                     \
+            ) {                              \
+    SingleEmissionCheckScope guard(this);    \
+    ASM(vd, vn, vm, vm_index);               \
+  }
+  NEON_BYELEMENT_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+  #undef DEFINE_MACRO_ASM_FUNC
+
+  #define NEON_2VREG_SHIFT_MACRO_LIST(V) \
+    V(rshrn,     Rshrn)                  \
+    V(rshrn2,    Rshrn2)                 \
+    V(shl,       Shl)                    \
+    V(shll,      Shll)                   \
+    V(shll2,     Shll2)                  \
+    V(shrn,      Shrn)                   \
+    V(shrn2,     Shrn2)                  \
+    V(sli,       Sli)                    \
+    V(sqrshrn,   Sqrshrn)                \
+    V(sqrshrn2,  Sqrshrn2)               \
+    V(sqrshrun,  Sqrshrun)               \
+    V(sqrshrun2, Sqrshrun2)              \
+    V(sqshl,     Sqshl)                  \
+    V(sqshlu,    Sqshlu)                 \
+    V(sqshrn,    Sqshrn)                 \
+    V(sqshrn2,   Sqshrn2)                \
+    V(sqshrun,   Sqshrun)                \
+    V(sqshrun2,  Sqshrun2)               \
+    V(sri,       Sri)                    \
+    V(srshr,     Srshr)                  \
+    V(srsra,     Srsra)                  \
+    V(sshll,     Sshll)                  \
+    V(sshll2,    Sshll2)                 \
+    V(sshr,      Sshr)                   \
+    V(ssra,      Ssra)                   \
+    V(uqrshrn,   Uqrshrn)                \
+    V(uqrshrn2,  Uqrshrn2)               \
+    V(uqshl,     Uqshl)                  \
+    V(uqshrn,    Uqshrn)                 \
+    V(uqshrn2,   Uqshrn2)                \
+    V(urshr,     Urshr)                  \
+    V(ursra,     Ursra)                  \
+    V(ushll,     Ushll)                  \
+    V(ushll2,    Ushll2)                 \
+    V(ushr,      Ushr)                   \
+    V(usra,      Usra)                   \
+
+  #define DEFINE_MACRO_ASM_FUNC(ASM, MASM)   \
+  void MASM(const VRegister& vd,             \
+            const VRegister& vn,             \
+            int shift) {                     \
+    SingleEmissionCheckScope guard(this);    \
+    ASM(vd, vn, shift);                      \
+  }
+  NEON_2VREG_SHIFT_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+  #undef DEFINE_MACRO_ASM_FUNC
+
+  void Bic(const VRegister& vd,
+           const int imm8,
+           const int left_shift = 0) {
+    SingleEmissionCheckScope guard(this);
+    bic(vd, imm8, left_shift);
+  }
+  void Cmeq(const VRegister& vd,
+            const VRegister& vn,
+            int imm) {
+    SingleEmissionCheckScope guard(this);
+    cmeq(vd, vn, imm);
+  }
+  void Cmge(const VRegister& vd,
+            const VRegister& vn,
+            int imm) {
+    SingleEmissionCheckScope guard(this);
+    cmge(vd, vn, imm);
+  }
+  void Cmgt(const VRegister& vd,
+            const VRegister& vn,
+            int imm) {
+    SingleEmissionCheckScope guard(this);
+    cmgt(vd, vn, imm);
+  }
+  void Cmle(const VRegister& vd,
+            const VRegister& vn,
+            int imm) {
+    SingleEmissionCheckScope guard(this);
+    cmle(vd, vn, imm);
+  }
+  void Cmlt(const VRegister& vd,
+            const VRegister& vn,
+            int imm) {
+    SingleEmissionCheckScope guard(this);
+    cmlt(vd, vn, imm);
+  }
+  void Dup(const VRegister& vd,
+           const VRegister& vn,
+           int index) {
+    SingleEmissionCheckScope guard(this);
+    dup(vd, vn, index);
+  }
+  void Dup(const VRegister& vd,
+           const Register& rn) {
+    SingleEmissionCheckScope guard(this);
+    dup(vd, rn);
+  }
+  void Ext(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm,
+           int index) {
+    SingleEmissionCheckScope guard(this);
+    ext(vd, vn, vm, index);
+  }
+  void Ins(const VRegister& vd,
+           int vd_index,
+           const VRegister& vn,
+           int vn_index) {
+    SingleEmissionCheckScope guard(this);
+    ins(vd, vd_index, vn, vn_index);
+  }
+  void Ins(const VRegister& vd,
+           int vd_index,
+           const Register& rn) {
+    SingleEmissionCheckScope guard(this);
+    ins(vd, vd_index, rn);
+  }
+  void Ld1(const VRegister& vt,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld1(vt, src);
+  }
+  void Ld1(const VRegister& vt,
+           const VRegister& vt2,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld1(vt, vt2, src);
+  }
+  void Ld1(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld1(vt, vt2, vt3, src);
+  }
+  void Ld1(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld1(vt, vt2, vt3, vt4, src);
+  }
+  void Ld1(const VRegister& vt,
+           int lane,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld1(vt, lane, src);
+  }
+  void Ld1r(const VRegister& vt,
+            const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld1r(vt, src);
+  }
+  void Ld2(const VRegister& vt,
+           const VRegister& vt2,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld2(vt, vt2, src);
+  }
+  void Ld2(const VRegister& vt,
+           const VRegister& vt2,
+           int lane,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld2(vt, vt2, lane, src);
+  }
+  void Ld2r(const VRegister& vt,
+            const VRegister& vt2,
+            const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld2r(vt, vt2, src);
+  }
+  void Ld3(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld3(vt, vt2, vt3, src);
+  }
+  void Ld3(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           int lane,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld3(vt, vt2, vt3, lane, src);
+  }
+  void Ld3r(const VRegister& vt,
+            const VRegister& vt2,
+            const VRegister& vt3,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld3r(vt, vt2, vt3, src);
+  }
+  void Ld4(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld4(vt, vt2, vt3, vt4, src);
+  }
+  void Ld4(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           int lane,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld4(vt, vt2, vt3, vt4, lane, src);
+  }
+  void Ld4r(const VRegister& vt,
+            const VRegister& vt2,
+            const VRegister& vt3,
+            const VRegister& vt4,
+           const MemOperand& src) {
+    SingleEmissionCheckScope guard(this);
+    ld4r(vt, vt2, vt3, vt4, src);
+  }
+  void Mov(const VRegister& vd,
+           int vd_index,
+           const VRegister& vn,
+           int vn_index) {
+    SingleEmissionCheckScope guard(this);
+    mov(vd, vd_index, vn, vn_index);
+  }
+  void Mov(const VRegister& vd,
+           const VRegister& vn,
+           int index) {
+    SingleEmissionCheckScope guard(this);
+    mov(vd, vn, index);
+  }
+  void Mov(const VRegister& vd,
+           int vd_index,
+           const Register& rn) {
+    SingleEmissionCheckScope guard(this);
+    mov(vd, vd_index, rn);
+  }
+  void Mov(const Register& rd,
+           const VRegister& vn,
+           int vn_index) {
+    SingleEmissionCheckScope guard(this);
+    mov(rd, vn, vn_index);
+  }
+  void Movi(const VRegister& vd,
+            uint64_t imm,
+            Shift shift = LSL,
+            int shift_amount = 0);
+  void Movi(const VRegister& vd, uint64_t hi, uint64_t lo);
+  void Mvni(const VRegister& vd,
+            const int imm8,
+            Shift shift = LSL,
+            const int shift_amount = 0) {
+    SingleEmissionCheckScope guard(this);
+    mvni(vd, imm8, shift, shift_amount);
+  }
+  void Orr(const VRegister& vd,
+           const int imm8,
+           const int left_shift = 0) {
+    SingleEmissionCheckScope guard(this);
+    orr(vd, imm8, left_shift);
+  }
+  void Scvtf(const VRegister& vd,
+             const VRegister& vn,
+             int fbits = 0) {
+    SingleEmissionCheckScope guard(this);
+    scvtf(vd, vn, fbits);
+  }
+  void Ucvtf(const VRegister& vd,
+             const VRegister& vn,
+             int fbits = 0) {
+    SingleEmissionCheckScope guard(this);
+    ucvtf(vd, vn, fbits);
+  }
+  void Fcvtzs(const VRegister& vd,
+              const VRegister& vn,
+              int fbits = 0) {
+    SingleEmissionCheckScope guard(this);
+    fcvtzs(vd, vn, fbits);
+  }
+  void Fcvtzu(const VRegister& vd,
+              const VRegister& vn,
+              int fbits = 0) {
+    SingleEmissionCheckScope guard(this);
+    fcvtzu(vd, vn, fbits);
+  }
+  void St1(const VRegister& vt,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st1(vt, dst);
+  }
+  void St1(const VRegister& vt,
+           const VRegister& vt2,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st1(vt, vt2, dst);
+  }
+  void St1(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st1(vt, vt2, vt3, dst);
+  }
+  void St1(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st1(vt, vt2, vt3, vt4, dst);
+  }
+  void St1(const VRegister& vt,
+           int lane,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st1(vt, lane, dst);
+  }
+  void St2(const VRegister& vt,
+           const VRegister& vt2,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st2(vt, vt2, dst);
+  }
+  void St3(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st3(vt, vt2, vt3, dst);
+  }
+  void St4(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st4(vt, vt2, vt3, vt4, dst);
+  }
+  void St2(const VRegister& vt,
+           const VRegister& vt2,
+           int lane,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st2(vt, vt2, lane, dst);
+  }
+  void St3(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           int lane,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st3(vt, vt2, vt3, lane, dst);
+  }
+  void St4(const VRegister& vt,
+           const VRegister& vt2,
+           const VRegister& vt3,
+           const VRegister& vt4,
+           int lane,
+           const MemOperand& dst) {
+    SingleEmissionCheckScope guard(this);
+    st4(vt, vt2, vt3, vt4, lane, dst);
+  }
+  void Smov(const Register& rd,
+            const VRegister& vn,
+            int vn_index) {
+    SingleEmissionCheckScope guard(this);
+    smov(rd, vn, vn_index);
+  }
+  void Umov(const Register& rd,
+            const VRegister& vn,
+            int vn_index) {
+    SingleEmissionCheckScope guard(this);
+    umov(rd, vn, vn_index);
+  }
+  void Crc32b(const Register& rd,
+              const Register& rn,
+              const Register& rm) {
+    SingleEmissionCheckScope guard(this);
+    crc32b(rd, rn, rm);
+  }
+  void Crc32h(const Register& rd,
+              const Register& rn,
+              const Register& rm) {
+    SingleEmissionCheckScope guard(this);
+    crc32h(rd, rn, rm);
+  }
+  void Crc32w(const Register& rd,
+              const Register& rn,
+              const Register& rm) {
+    SingleEmissionCheckScope guard(this);
+    crc32w(rd, rn, rm);
+  }
+  void Crc32x(const Register& rd,
+              const Register& rn,
+              const Register& rm) {
+    SingleEmissionCheckScope guard(this);
+    crc32x(rd, rn, rm);
+  }
+  void Crc32cb(const Register& rd,
+               const Register& rn,
+               const Register& rm) {
+    SingleEmissionCheckScope guard(this);
+    crc32cb(rd, rn, rm);
+  }
+  void Crc32ch(const Register& rd,
+               const Register& rn,
+               const Register& rm) {
+    SingleEmissionCheckScope guard(this);
+    crc32ch(rd, rn, rm);
+  }
+  void Crc32cw(const Register& rd,
+               const Register& rn,
+               const Register& rm) {
+    SingleEmissionCheckScope guard(this);
+    crc32cw(rd, rn, rm);
+  }
+  void Crc32cx(const Register& rd,
+               const Register& rn,
+               const Register& rm) {
+    SingleEmissionCheckScope guard(this);
+    crc32cx(rd, rn, rm);
+  }
+
+  // Push the system stack pointer (sp) down to allow the same to be done to
+  // the current stack pointer (according to StackPointer()). This must be
+  // called _before_ accessing the memory.
+  //
+  // This is necessary when pushing or otherwise adding things to the stack, to
+  // satisfy the AAPCS64 constraint that the memory below the system stack
+  // pointer is not accessed.
+  //
+  // This method asserts that StackPointer() is not sp, since the call does
+  // not make sense in that context.
+  //
+  // TODO: This method can only accept values of 'space' that can be encoded in
+  // one instruction. Refer to the implementation for details.
+  void BumpSystemStackPointer(const Operand& space);
+
+  // Set the current stack pointer, but don't generate any code.
+  void SetStackPointer64(const Register& stack_pointer) {
+    VIXL_ASSERT(!TmpList()->IncludesAliasOf(stack_pointer));
+    sp_ = stack_pointer;
+  }
+
+  // Return the current stack pointer, as set by SetStackPointer.
+  const Register& StackPointer() const {
+    return sp_;
+  }
+
+  const Register& GetStackPointer64() const {
+    return sp_;
+  }
+
+  js::jit::RegisterOrSP getStackPointer() const {
+      return js::jit::RegisterOrSP(sp_.code());
+  }
+
+  CPURegList* TmpList() { return &tmp_list_; }
+  CPURegList* FPTmpList() { return &fptmp_list_; }
+
+  // Trace control when running the debug simulator.
+  //
+  // For example:
+  //
+  // __ Trace(LOG_REGS, TRACE_ENABLE);
+  // Will add registers to the trace if it wasn't already the case.
+  //
+  // __ Trace(LOG_DISASM, TRACE_DISABLE);
+  // Will stop logging disassembly. It has no effect if the disassembly wasn't
+  // already being logged.
+  void Trace(TraceParameters parameters, TraceCommand command);
+
+  // Log the requested data independently of what is being traced.
+  //
+  // For example:
+  //
+  // __ Log(LOG_FLAGS)
+  // Will output the flags.
+  void Log(TraceParameters parameters);
+
+  // Enable or disable instrumentation when an Instrument visitor is attached to
+  // the simulator.
+  void EnableInstrumentation();
+  void DisableInstrumentation();
+
+  // Add a marker to the instrumentation data produced by an Instrument visitor.
+  // The name is a two character string that will be attached to the marker in
+  // the output data.
+  void AnnotateInstrumentation(const char* marker_name);
+
+ private:
+  // The actual Push and Pop implementations. These don't generate any code
+  // other than that required for the push or pop. This allows
+  // (Push|Pop)CPURegList to bundle together setup code for a large block of
+  // registers.
+  //
+  // Note that size is per register, and is specified in bytes.
+  void PushHelper(int count, int size,
+                  const CPURegister& src0, const CPURegister& src1,
+                  const CPURegister& src2, const CPURegister& src3);
+  void PopHelper(int count, int size,
+                 const CPURegister& dst0, const CPURegister& dst1,
+                 const CPURegister& dst2, const CPURegister& dst3);
+
+  void Movi16bitHelper(const VRegister& vd, uint64_t imm);
+  void Movi32bitHelper(const VRegister& vd, uint64_t imm);
+  void Movi64bitHelper(const VRegister& vd, uint64_t imm);
+
+  // Perform necessary maintenance operations before a push or pop.
+  //
+  // Note that size is per register, and is specified in bytes.
+  void PrepareForPush(int count, int size);
+  void PrepareForPop(int count, int size);
+
+  // The actual implementation of load and store operations for CPURegList.
+  enum LoadStoreCPURegListAction {
+    kLoad,
+    kStore
+  };
+  void LoadStoreCPURegListHelper(LoadStoreCPURegListAction operation,
+                                 CPURegList registers,
+                                 const MemOperand& mem);
+  // Returns a MemOperand suitable for loading or storing a CPURegList at `dst`.
+  // This helper may allocate registers from `scratch_scope` and generate code
+  // to compute an intermediate address. The resulting MemOperand is only valid
+  // as long as `scratch_scope` remains valid.
+  MemOperand BaseMemOperandForLoadStoreCPURegList(
+      const CPURegList& registers,
+      const MemOperand& mem,
+      UseScratchRegisterScope* scratch_scope);
+
+  bool LabelIsOutOfRange(Label* label, ImmBranchType branch_type) {
+    return !Instruction::IsValidImmPCOffset(branch_type, nextOffset().getOffset() - label->offset());
+  }
+
+  // The register to use as a stack pointer for stack operations.
+  Register sp_;
+
+  // Scratch registers available for use by the MacroAssembler.
+  CPURegList tmp_list_;
+  CPURegList fptmp_list_;
+
+  ptrdiff_t checkpoint_;
+  ptrdiff_t recommended_checkpoint_;
+};
+
+
+// All Assembler emits MUST acquire/release the underlying code buffer. The
+// helper scope below will do so and optionally ensure the buffer is big enough
+// to receive the emit. It is possible to request the scope not to perform any
+// checks (kNoCheck) if for example it is known in advance the buffer size is
+// adequate or there is some other size checking mechanism in place.
+class CodeBufferCheckScope {
+ public:
+  // Tell whether or not the scope needs to ensure the associated CodeBuffer
+  // has enough space for the requested size.
+  enum CheckPolicy {
+    kNoCheck,
+    kCheck
+  };
+
+  // Tell whether or not the scope should assert the amount of code emitted
+  // within the scope is consistent with the requested amount.
+  enum AssertPolicy {
+    kNoAssert,    // No assert required.
+    kExactSize,   // The code emitted must be exactly size bytes.
+    kMaximumSize  // The code emitted must be at most size bytes.
+  };
+
+  CodeBufferCheckScope(Assembler* assm,
+                       size_t size,
+                       CheckPolicy check_policy = kCheck,
+                       AssertPolicy assert_policy = kMaximumSize)
+  { }
+
+  // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
+  explicit CodeBufferCheckScope(Assembler* assm) {}
+};
+
+
+// Use this scope when you need a one-to-one mapping between methods and
+// instructions. This scope prevents the MacroAssembler from being called and
+// literal pools from being emitted. It also asserts the number of instructions
+// emitted is what you specified when creating the scope.
+// FIXME: Because of the disabled calls below, this class asserts nothing.
+class InstructionAccurateScope : public CodeBufferCheckScope {
+ public:
+  InstructionAccurateScope(MacroAssembler* masm,
+                           int64_t count,
+                           AssertPolicy policy = kExactSize)
+      : CodeBufferCheckScope(masm,
+                             (count * kInstructionSize),
+                             kCheck,
+                             policy) {
+  }
+};
+
+
+// This scope utility allows scratch registers to be managed safely. The
+// MacroAssembler's TmpList() (and FPTmpList()) is used as a pool of scratch
+// registers. These registers can be allocated on demand, and will be returned
+// at the end of the scope.
+//
+// When the scope ends, the MacroAssembler's lists will be restored to their
+// original state, even if the lists were modified by some other means.
+class UseScratchRegisterScope {
+ public:
+  // This constructor implicitly calls the `Open` function to initialise the
+  // scope, so it is ready to use immediately after it has been constructed.
+  explicit UseScratchRegisterScope(MacroAssembler* masm);
+  // This constructor allows deferred and optional initialisation of the scope.
+  // The user is required to explicitly call the `Open` function before using
+  // the scope.
+  UseScratchRegisterScope();
+  // This function performs the actual initialisation work.
+  void Open(MacroAssembler* masm);
+
+  // The destructor always implicitly calls the `Close` function.
+  ~UseScratchRegisterScope();
+  // This function performs the cleaning-up work. It must succeed even if the
+  // scope has not been opened. It is safe to call multiple times.
+  void Close();
+
+
+  bool IsAvailable(const CPURegister& reg) const;
+
+
+  // Take a register from the appropriate temps list. It will be returned
+  // automatically when the scope ends.
+  Register AcquireW() { return AcquireNextAvailable(available_).W(); }
+  Register AcquireX() { return AcquireNextAvailable(available_).X(); }
+  VRegister AcquireS() { return AcquireNextAvailable(availablefp_).S(); }
+  VRegister AcquireD() { return AcquireNextAvailable(availablefp_).D(); }
+  VRegister AcquireQ() { return AcquireNextAvailable(availablefp_).Q(); }
+
+
+  Register AcquireSameSizeAs(const Register& reg);
+  VRegister AcquireSameSizeAs(const VRegister& reg);
+
+
+  // Explicitly release an acquired (or excluded) register, putting it back in
+  // the appropriate temps list.
+  void Release(const CPURegister& reg);
+
+
+  // Make the specified registers available as scratch registers for the
+  // duration of this scope.
+  void Include(const CPURegList& list);
+  void Include(const Register& reg1,
+               const Register& reg2 = NoReg,
+               const Register& reg3 = NoReg,
+               const Register& reg4 = NoReg);
+  void Include(const VRegister& reg1,
+               const VRegister& reg2 = NoVReg,
+               const VRegister& reg3 = NoVReg,
+               const VRegister& reg4 = NoVReg);
+
+
+  // Make sure that the specified registers are not available in this scope.
+  // This can be used to prevent helper functions from using sensitive
+  // registers, for example.
+  void Exclude(const CPURegList& list);
+  void Exclude(const Register& reg1,
+               const Register& reg2 = NoReg,
+               const Register& reg3 = NoReg,
+               const Register& reg4 = NoReg);
+  void Exclude(const VRegister& reg1,
+               const VRegister& reg2 = NoVReg,
+               const VRegister& reg3 = NoVReg,
+               const VRegister& reg4 = NoVReg);
+  void Exclude(const CPURegister& reg1,
+               const CPURegister& reg2 = NoCPUReg,
+               const CPURegister& reg3 = NoCPUReg,
+               const CPURegister& reg4 = NoCPUReg);
+
+
+  // Prevent any scratch registers from being used in this scope.
+  void ExcludeAll();
+
+
+ private:
+  static CPURegister AcquireNextAvailable(CPURegList* available);
+
+  static void ReleaseByCode(CPURegList* available, int code);
+
+  static void ReleaseByRegList(CPURegList* available,
+                               RegList regs);
+
+  static void IncludeByRegList(CPURegList* available,
+                               RegList exclude);
+
+  static void ExcludeByRegList(CPURegList* available,
+                               RegList exclude);
+
+  // Available scratch registers.
+  CPURegList* available_;     // kRegister
+  CPURegList* availablefp_;   // kVRegister
+
+  // The state of the available lists at the start of this scope.
+  RegList old_available_;     // kRegister
+  RegList old_availablefp_;   // kVRegister
+#ifdef DEBUG
+  bool initialised_;
+#endif
+
+  // Disallow copy constructor and operator=.
+  UseScratchRegisterScope(const UseScratchRegisterScope&) {
+    VIXL_UNREACHABLE();
+  }
+  void operator=(const UseScratchRegisterScope&) {
+    VIXL_UNREACHABLE();
+  }
+};
+
+
+}  // namespace vixl
+
+#endif  // VIXL_A64_MACRO_ASSEMBLER_A64_H_
diff --git a/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp b/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp
new file mode 100644
index 0000000000..b9189cc23b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp
@@ -0,0 +1,610 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+#include "jit/Label.h"
+
+namespace vixl {
+
+using LabelDoc = js::jit::DisassemblerSpew::LabelDoc;
+
+// Assembler
+void Assembler::FinalizeCode() {
+#ifdef DEBUG
+  finalized_ = true;
+#endif
+}
+
+// Unbound Label Representation.
+//
+// We can have multiple branches using the same label before it is bound.
+// Assembler::bind() must then be able to enumerate all the branches and patch
+// them to target the final label location.
+//
+// When a Label is unbound with uses, its offset is pointing to the tip of a
+// linked list of uses. The uses can be branches or adr/adrp instructions. In
+// the case of branches, the next member in the linked list is simply encoded
+// as the branch target. For adr/adrp, the relative pc offset is encoded in the
+// immediate field as a signed instruction offset.
+//
+// In both cases, the end of the list is encoded as a 0 pc offset, i.e. the
+// tail is pointing to itself.
+
+static const ptrdiff_t kEndOfLabelUseList = 0;
+
+BufferOffset
+MozBaseAssembler::NextLink(BufferOffset cur)
+{
+    Instruction* link = getInstructionAt(cur);
+    // Raw encoded offset.
+    ptrdiff_t offset = link->ImmPCRawOffset();
+    // End of the list is encoded as 0.
+    if (offset == kEndOfLabelUseList)
+        return BufferOffset();
+    // The encoded offset is the number of instructions to move.
+    return BufferOffset(cur.getOffset() + offset * kInstructionSize);
+}
+
+static ptrdiff_t
+EncodeOffset(BufferOffset cur, BufferOffset next)
+{
+    MOZ_ASSERT(next.assigned() && cur.assigned());
+    ptrdiff_t offset = next.getOffset() - cur.getOffset();
+    MOZ_ASSERT(offset % kInstructionSize == 0);
+    return offset / kInstructionSize;
+}
+
+void
+MozBaseAssembler::SetNextLink(BufferOffset cur, BufferOffset next)
+{
+    Instruction* link = getInstructionAt(cur);
+    link->SetImmPCRawOffset(EncodeOffset(cur, next));
+}
+
+// A common implementation for the LinkAndGet<Type>OffsetTo helpers.
+//
+// If the label is bound, returns the offset as a multiple of 1 << elementShift.
+// Otherwise, links the instruction to the label and returns the raw offset to
+// encode. (This will be an instruction count.)
+//
+// The offset is calculated by aligning the PC and label addresses down to a
+// multiple of 1 << elementShift, then calculating the (scaled) offset between
+// them. This matches the semantics of adrp, for example. (Assuming that the
+// assembler buffer is page-aligned, which it probably isn't.)
+//
+// For an unbound label, the returned offset will be encodable in the provided
+// branch range. If the label is already bound, the caller is expected to make
+// sure that it is in range, and emit the necessary branch instrutions if it
+// isn't.
+//
+ptrdiff_t
+MozBaseAssembler::LinkAndGetOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange,
+                                     unsigned elementShift, Label* label)
+{
+  if (armbuffer_.oom())
+    return kEndOfLabelUseList;
+
+  if (label->bound()) {
+    // The label is bound: all uses are already linked.
+    ptrdiff_t branch_offset = ptrdiff_t(branch.getOffset() >> elementShift);
+    ptrdiff_t label_offset = ptrdiff_t(label->offset() >> elementShift);
+    return label_offset - branch_offset;
+  }
+
+  // Keep track of short-range branches targeting unbound labels. We may need
+  // to insert veneers in PatchShortRangeBranchToVeneer() below.
+  if (branchRange < NumShortBranchRangeTypes) {
+      // This is the last possible branch target.
+      BufferOffset deadline(branch.getOffset() +
+                            Instruction::ImmBranchMaxForwardOffset(branchRange));
+      armbuffer_.registerBranchDeadline(branchRange, deadline);
+  }
+
+  // The label is unbound and previously unused: Store the offset in the label
+  // itself for patching by bind().
+  if (!label->used()) {
+    label->use(branch.getOffset());
+    return kEndOfLabelUseList;
+  }
+
+  // The label is unbound and has multiple users. Create a linked list between
+  // the branches, and update the linked list head in the label struct. This is
+  // not always trivial since the branches in the linked list have limited
+  // ranges.
+
+  // What is the earliest buffer offset that would be reachable by the branch
+  // we're about to add?
+  ptrdiff_t earliestReachable =
+    branch.getOffset() + Instruction::ImmBranchMinBackwardOffset(branchRange);
+
+  // If the existing instruction at the head of the list is within reach of the
+  // new branch, we can simply insert the new branch at the front of the list.
+  if (label->offset() >= earliestReachable) {
+      ptrdiff_t offset = EncodeOffset(branch, BufferOffset(label));
+      label->use(branch.getOffset());
+      MOZ_ASSERT(offset != kEndOfLabelUseList);
+      return offset;
+  }
+
+  // The label already has a linked list of uses, but we can't reach the head
+  // of the list with the allowed branch range. Insert this branch at a
+  // different position in the list.
+  //
+  // Find an existing branch, exbr, such that:
+  //
+  // 1.  The new branch can be reached by exbr, and either
+  // 2a. The new branch can reach exbr's target, or
+  // 2b. The exbr branch is at the end of the list.
+  //
+  // Then the new branch can be inserted after exbr in the linked list.
+  //
+  // We know that it is always possible to find an exbr branch satisfying these
+  // conditions because of the PatchShortRangeBranchToVeneer() mechanism. All
+  // branches are guaranteed to either be able to reach the end of the
+  // assembler buffer, or they will be pointing to an unconditional branch that
+  // can.
+  //
+  // In particular, the end of the list is always a viable candidate, so we'll
+  // just get that.
+  BufferOffset next(label);
+  BufferOffset exbr;
+  do {
+      exbr = next;
+      next = NextLink(next);
+  } while (next.assigned());
+  SetNextLink(exbr, branch);
+
+  // This branch becomes the new end of the list.
+  return kEndOfLabelUseList;
+}
+
+ptrdiff_t MozBaseAssembler::LinkAndGetByteOffsetTo(BufferOffset branch, Label* label) {
+  return LinkAndGetOffsetTo(branch, UncondBranchRangeType, 0, label);
+}
+
+ptrdiff_t MozBaseAssembler::LinkAndGetInstructionOffsetTo(BufferOffset branch,
+                                                          ImmBranchRangeType branchRange,
+                                                          Label* label) {
+  return LinkAndGetOffsetTo(branch, branchRange, kInstructionSizeLog2, label);
+}
+
+ptrdiff_t MozBaseAssembler::LinkAndGetPageOffsetTo(BufferOffset branch, Label* label) {
+  return LinkAndGetOffsetTo(branch, UncondBranchRangeType, kPageSizeLog2, label);
+}
+
+BufferOffset Assembler::b(int imm26, const LabelDoc& doc) {
+  return EmitBranch(B | ImmUncondBranch(imm26), doc);
+}
+
+
+void Assembler::b(Instruction* at, int imm26) {
+  return EmitBranch(at, B | ImmUncondBranch(imm26));
+}
+
+
+BufferOffset Assembler::b(int imm19, Condition cond, const LabelDoc& doc) {
+  return EmitBranch(B_cond | ImmCondBranch(imm19) | cond, doc);
+}
+
+
+void Assembler::b(Instruction* at, int imm19, Condition cond) {
+  EmitBranch(at, B_cond | ImmCondBranch(imm19) | cond);
+}
+
+
+BufferOffset Assembler::b(Label* label) {
+  // Encode the relative offset from the inserted branch to the label.
+  LabelDoc doc = refLabel(label);
+  return b(LinkAndGetInstructionOffsetTo(nextInstrOffset(), UncondBranchRangeType, label), doc);
+}
+
+
+BufferOffset Assembler::b(Label* label, Condition cond) {
+  // Encode the relative offset from the inserted branch to the label.
+  LabelDoc doc = refLabel(label);
+  return b(LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), cond, doc);
+}
+
+void Assembler::br(Instruction* at, const Register& xn) {
+  VIXL_ASSERT(xn.Is64Bits());
+  // No need for EmitBranch(): no immediate offset needs fixing.
+  Emit(at, BR | Rn(xn));
+}
+
+
+void Assembler::blr(Instruction* at, const Register& xn) {
+  VIXL_ASSERT(xn.Is64Bits());
+  // No need for EmitBranch(): no immediate offset needs fixing.
+  Emit(at, BLR | Rn(xn));
+}
+
+
+void Assembler::bl(int imm26, const LabelDoc& doc) {
+  EmitBranch(BL | ImmUncondBranch(imm26), doc);
+}
+
+
+void Assembler::bl(Instruction* at, int imm26) {
+  EmitBranch(at, BL | ImmUncondBranch(imm26));
+}
+
+
+void Assembler::bl(Label* label) {
+  // Encode the relative offset from the inserted branch to the label.
+  LabelDoc doc = refLabel(label);
+  return bl(LinkAndGetInstructionOffsetTo(nextInstrOffset(), UncondBranchRangeType, label), doc);
+}
+
+
+void Assembler::cbz(const Register& rt, int imm19, const LabelDoc& doc) {
+  EmitBranch(SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt), doc);
+}
+
+
+void Assembler::cbz(Instruction* at, const Register& rt, int imm19) {
+  EmitBranch(at, SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt));
+}
+
+
+void Assembler::cbz(const Register& rt, Label* label) {
+  // Encode the relative offset from the inserted branch to the label.
+  LabelDoc doc = refLabel(label);
+  return cbz(rt, LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), doc);
+}
+
+
+void Assembler::cbnz(const Register& rt, int imm19, const LabelDoc& doc) {
+  EmitBranch(SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt), doc);
+}
+
+
+void Assembler::cbnz(Instruction* at, const Register& rt, int imm19) {
+  EmitBranch(at, SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt));
+}
+
+
+void Assembler::cbnz(const Register& rt, Label* label) {
+  // Encode the relative offset from the inserted branch to the label.
+  LabelDoc doc = refLabel(label);
+  return cbnz(rt, LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), doc);
+}
+
+
+void Assembler::tbz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc) {
+  VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize)));
+  EmitBranch(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt), doc);
+}
+
+
+void Assembler::tbz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14) {
+  VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize)));
+  EmitBranch(at, TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
+}
+
+
+void Assembler::tbz(const Register& rt, unsigned bit_pos, Label* label) {
+  // Encode the relative offset from the inserted branch to the label.
+  LabelDoc doc = refLabel(label);
+  return tbz(rt, bit_pos, LinkAndGetInstructionOffsetTo(nextInstrOffset(), TestBranchRangeType, label), doc);
+}
+
+
+void Assembler::tbnz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc) {
+  VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize)));
+  EmitBranch(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt), doc);
+}
+
+
+void Assembler::tbnz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14) {
+  VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize)));
+  EmitBranch(at, TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
+}
+
+
+void Assembler::tbnz(const Register& rt, unsigned bit_pos, Label* label) {
+  // Encode the relative offset from the inserted branch to the label.
+  LabelDoc doc = refLabel(label);
+  return tbnz(rt, bit_pos, LinkAndGetInstructionOffsetTo(nextInstrOffset(), TestBranchRangeType, label), doc);
+}
+
+
+void Assembler::adr(const Register& rd, int imm21, const LabelDoc& doc) {
+  VIXL_ASSERT(rd.Is64Bits());
+  EmitBranch(ADR | ImmPCRelAddress(imm21) | Rd(rd), doc);
+}
+
+
+void Assembler::adr(Instruction* at, const Register& rd, int imm21) {
+  VIXL_ASSERT(rd.Is64Bits());
+  EmitBranch(at, ADR | ImmPCRelAddress(imm21) | Rd(rd));
+}
+
+
+void Assembler::adr(const Register& rd, Label* label) {
+  // Encode the relative offset from the inserted adr to the label.
+  LabelDoc doc = refLabel(label);
+  return adr(rd, LinkAndGetByteOffsetTo(nextInstrOffset(), label), doc);
+}
+
+
+void Assembler::adrp(const Register& rd, int imm21, const LabelDoc& doc) {
+  VIXL_ASSERT(rd.Is64Bits());
+  EmitBranch(ADRP | ImmPCRelAddress(imm21) | Rd(rd), doc);
+}
+
+
+void Assembler::adrp(Instruction* at, const Register& rd, int imm21) {
+  VIXL_ASSERT(rd.Is64Bits());
+  EmitBranch(at, ADRP | ImmPCRelAddress(imm21) | Rd(rd));
+}
+
+
+void Assembler::adrp(const Register& rd, Label* label) {
+  VIXL_ASSERT(AllowPageOffsetDependentCode());
+  // Encode the relative offset from the inserted adr to the label.
+  LabelDoc doc = refLabel(label);
+  return adrp(rd, LinkAndGetPageOffsetTo(nextInstrOffset(), label), doc);
+}
+
+
+BufferOffset Assembler::ands(const Register& rd, const Register& rn, const Operand& operand) {
+  return Logical(rd, rn, operand, ANDS);
+}
+
+
+BufferOffset Assembler::tst(const Register& rn, const Operand& operand) {
+  return ands(AppropriateZeroRegFor(rn), rn, operand);
+}
+
+
+void Assembler::ldr(Instruction* at, const CPURegister& rt, int imm19) {
+  LoadLiteralOp op = LoadLiteralOpFor(rt);
+  Emit(at, op | ImmLLiteral(imm19) | Rt(rt));
+}
+
+
+BufferOffset Assembler::hint(SystemHint code) {
+  return Emit(HINT | ImmHint(code));
+}
+
+
+void Assembler::hint(Instruction* at, SystemHint code) {
+  Emit(at, HINT | ImmHint(code));
+}
+
+
+void Assembler::svc(Instruction* at, int code) {
+  VIXL_ASSERT(IsUint16(code));
+  Emit(at, SVC | ImmException(code));
+}
+
+
+void Assembler::nop(Instruction* at) {
+  hint(at, NOP);
+}
+
+
+void Assembler::csdb(Instruction* at) {
+  hint(at, CSDB);
+}
+
+
+BufferOffset Assembler::Logical(const Register& rd, const Register& rn,
+                                const Operand& operand, LogicalOp op)
+{
+  VIXL_ASSERT(rd.size() == rn.size());
+  if (operand.IsImmediate()) {
+    int64_t immediate = operand.immediate();
+    unsigned reg_size = rd.size();
+
+    VIXL_ASSERT(immediate != 0);
+    VIXL_ASSERT(immediate != -1);
+    VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
+
+    // If the operation is NOT, invert the operation and immediate.
+    if ((op & NOT) == NOT) {
+      op = static_cast<LogicalOp>(op & ~NOT);
+      immediate = rd.Is64Bits() ? ~immediate : (~immediate & kWRegMask);
+    }
+
+    unsigned n, imm_s, imm_r;
+    if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
+      // Immediate can be encoded in the instruction.
+      return LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
+    } else {
+      // This case is handled in the macro assembler.
+      VIXL_UNREACHABLE();
+    }
+  } else {
+    VIXL_ASSERT(operand.IsShiftedRegister());
+    VIXL_ASSERT(operand.reg().size() == rd.size());
+    Instr dp_op = static_cast<Instr>(op | LogicalShiftedFixed);
+    return DataProcShiftedRegister(rd, rn, operand, LeaveFlags, dp_op);
+  }
+}
+
+
+BufferOffset Assembler::LogicalImmediate(const Register& rd, const Register& rn,
+                                         unsigned n, unsigned imm_s, unsigned imm_r, LogicalOp op)
+{
+    unsigned reg_size = rd.size();
+    Instr dest_reg = (op == ANDS) ? Rd(rd) : RdSP(rd);
+    return Emit(SF(rd) | LogicalImmediateFixed | op | BitN(n, reg_size) |
+                ImmSetBits(imm_s, reg_size) | ImmRotate(imm_r, reg_size) | dest_reg | Rn(rn));
+}
+
+
+BufferOffset Assembler::DataProcShiftedRegister(const Register& rd, const Register& rn,
+                                                const Operand& operand, FlagsUpdate S, Instr op)
+{
+  VIXL_ASSERT(operand.IsShiftedRegister());
+  VIXL_ASSERT(rn.Is64Bits() || (rn.Is32Bits() && IsUint5(operand.shift_amount())));
+  return Emit(SF(rd) | op | Flags(S) |
+              ShiftDP(operand.shift()) | ImmDPShift(operand.shift_amount()) |
+              Rm(operand.reg()) | Rn(rn) | Rd(rd));
+}
+
+
+void MozBaseAssembler::InsertIndexIntoTag(uint8_t* load, uint32_t index) {
+  // Store the js::jit::PoolEntry index into the instruction.
+  // finishPool() will walk over all literal load instructions
+  // and use PatchConstantPoolLoad() to patch to the final relative offset.
+  *((uint32_t*)load) |= Assembler::ImmLLiteral(index);
+}
+
+
+bool MozBaseAssembler::PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr) {
+  Instruction* load = reinterpret_cast<Instruction*>(loadAddr);
+
+  // The load currently contains the js::jit::PoolEntry's index,
+  // as written by InsertIndexIntoTag().
+  uint32_t index = load->ImmLLiteral();
+
+  // Each entry in the literal pool is uint32_t-sized,
+  // but literals may use multiple entries.
+  uint32_t* constPool = reinterpret_cast<uint32_t*>(constPoolAddr);
+  Instruction* source = reinterpret_cast<Instruction*>(&constPool[index]);
+
+  load->SetImmLLiteral(source);
+  return false; // Nothing uses the return value.
+}
+
+void
+MozBaseAssembler::PatchShortRangeBranchToVeneer(ARMBuffer* buffer, unsigned rangeIdx,
+                                                BufferOffset deadline, BufferOffset veneer)
+{
+  // Reconstruct the position of the branch from (rangeIdx, deadline).
+  vixl::ImmBranchRangeType branchRange = static_cast<vixl::ImmBranchRangeType>(rangeIdx);
+  BufferOffset branch(deadline.getOffset() - Instruction::ImmBranchMaxForwardOffset(branchRange));
+  Instruction *branchInst = buffer->getInst(branch);
+  Instruction *veneerInst = buffer->getInst(veneer);
+
+  // Verify that the branch range matches what's encoded.
+  MOZ_ASSERT(Instruction::ImmBranchTypeToRange(branchInst->BranchType()) == branchRange);
+
+  // We want to insert veneer after branch in the linked list of instructions
+  // that use the same unbound label.
+  // The veneer should be an unconditional branch.
+  ptrdiff_t nextElemOffset = branchInst->ImmPCRawOffset();
+
+  // If offset is 0, this is the end of the linked list.
+  if (nextElemOffset != kEndOfLabelUseList) {
+      // Make the offset relative to veneer so it targets the same instruction
+      // as branchInst.
+      nextElemOffset *= kInstructionSize;
+      nextElemOffset += branch.getOffset() - veneer.getOffset();
+      nextElemOffset /= kInstructionSize;
+  }
+  Assembler::b(veneerInst, nextElemOffset);
+
+  // Now point branchInst at veneer. See also SetNextLink() above.
+  branchInst->SetImmPCRawOffset(EncodeOffset(branch, veneer));
+}
+
+struct PoolHeader {
+  uint32_t data;
+
+  struct Header {
+    // The size should take into account the pool header.
+    // The size is in units of Instruction (4bytes), not byte.
+    union {
+      struct {
+        uint32_t size : 15;
+
+	// "Natural" guards are part of the normal instruction stream,
+	// while "non-natural" guards are inserted for the sole purpose
+	// of skipping around a pool.
+        uint32_t isNatural : 1;
+        uint32_t ONES : 16;
+      };
+      uint32_t data;
+    };
+
+    Header(int size_, bool isNatural_)
+      : size(size_),
+        isNatural(isNatural_),
+        ONES(0xffff)
+    { }
+
+    Header(uint32_t data)
+      : data(data)
+    {
+      VIXL_STATIC_ASSERT(sizeof(Header) == sizeof(uint32_t));
+      VIXL_ASSERT(ONES == 0xffff);
+    }
+
+    uint32_t raw() const {
+      VIXL_STATIC_ASSERT(sizeof(Header) == sizeof(uint32_t));
+      return data;
+    }
+  };
+
+  PoolHeader(int size_, bool isNatural_)
+    : data(Header(size_, isNatural_).raw())
+  { }
+
+  uint32_t size() const {
+    Header tmp(data);
+    return tmp.size;
+  }
+
+  uint32_t isNatural() const {
+    Header tmp(data);
+    return tmp.isNatural;
+  }
+};
+
+
+void MozBaseAssembler::WritePoolHeader(uint8_t* start, js::jit::Pool* p, bool isNatural) {
+  static_assert(sizeof(PoolHeader) == 4);
+
+  // Get the total size of the pool.
+  const uintptr_t totalPoolSize = sizeof(PoolHeader) + p->getPoolSize();
+  const uintptr_t totalPoolInstructions = totalPoolSize / kInstructionSize;
+
+  VIXL_ASSERT((totalPoolSize & 0x3) == 0);
+  VIXL_ASSERT(totalPoolInstructions < (1 << 15));
+
+  PoolHeader header(totalPoolInstructions, isNatural);
+  *(PoolHeader*)start = header;
+}
+
+
+void MozBaseAssembler::WritePoolFooter(uint8_t* start, js::jit::Pool* p, bool isNatural) {
+  return;
+}
+
+
+void MozBaseAssembler::WritePoolGuard(BufferOffset branch, Instruction* inst, BufferOffset dest) {
+  int byteOffset = dest.getOffset() - branch.getOffset();
+  VIXL_ASSERT(byteOffset % kInstructionSize == 0);
+
+  int instOffset = byteOffset >> kInstructionSizeLog2;
+  Assembler::b(inst, instOffset);
+}
+
+
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
new file mode 100644
index 0000000000..5d12f81bb1
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
@@ -0,0 +1,356 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef jit_arm64_vixl_MozBaseAssembler_vixl_h
+#define jit_arm64_vixl_MozBaseAssembler_vixl_h
+
+
+#include "mozilla/Assertions.h"  // MOZ_ASSERT
+#include "mozilla/Sprintf.h"     // SprintfLiteral
+
+#include <stddef.h>  // size_t
+#include <stdint.h>  // uint8_t, uint32_t
+#include <string.h>  // strstr
+
+#include "jit/arm64/vixl/Constants-vixl.h"     // vixl::{HINT, NOP, ImmHint_offset}
+#include "jit/arm64/vixl/Globals-vixl.h"       // VIXL_ASSERT
+#include "jit/arm64/vixl/Instructions-vixl.h"  // vixl::{Instruction, NumShortBranchRangeTypes, Instr, ImmBranchRangeType}
+
+#include "jit/Label.h"                       // jit::Label
+#include "jit/shared/Assembler-shared.h"     // jit::AssemblerShared
+#include "jit/shared/Disassembler-shared.h"  // jit::DisassemblerSpew
+#include "jit/shared/IonAssemblerBuffer.h"   // jit::BufferOffset
+#include "jit/shared/IonAssemblerBufferWithConstantPools.h"  // jit::AssemblerBufferWithConstantPools
+
+namespace vixl {
+
+
+using js::jit::BufferOffset;
+using js::jit::DisassemblerSpew;
+using js::jit::Label;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+
+#ifdef JS_DISASM_ARM64
+void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr);
+#endif
+
+class MozBaseAssembler;
+typedef js::jit::AssemblerBufferWithConstantPools<1024, 4, Instruction, MozBaseAssembler,
+                                                  NumShortBranchRangeTypes> ARMBuffer;
+
+// Base class for vixl::Assembler, for isolating Moz-specific changes to VIXL.
+class MozBaseAssembler : public js::jit::AssemblerShared {
+  // Buffer initialization constants.
+  static const unsigned BufferGuardSize = 1;
+  static const unsigned BufferHeaderSize = 1;
+  static const size_t   BufferCodeAlignment = 8;
+  static const size_t   BufferMaxPoolOffset = 1024;
+  static const unsigned BufferPCBias = 0;
+  static const uint32_t BufferAlignmentFillInstruction = HINT | (NOP << ImmHint_offset);
+  static const uint32_t BufferNopFillInstruction = HINT | (NOP << ImmHint_offset);
+  static const unsigned BufferNumDebugNopsToInsert = 0;
+
+#ifdef JS_DISASM_ARM64
+  static constexpr const char* const InstrIndent = "        ";
+  static constexpr const char* const LabelIndent = "                 ";
+  static constexpr const char* const TargetIndent = "                    ";
+#endif
+
+ public:
+  MozBaseAssembler()
+    : armbuffer_(BufferGuardSize,
+                 BufferHeaderSize,
+                 BufferCodeAlignment,
+                 BufferMaxPoolOffset,
+                 BufferPCBias,
+                 BufferAlignmentFillInstruction,
+                 BufferNopFillInstruction,
+                 BufferNumDebugNopsToInsert)
+  {
+#ifdef JS_DISASM_ARM64
+      spew_.setLabelIndent(LabelIndent);
+      spew_.setTargetIndent(TargetIndent);
+#endif
+}
+  ~MozBaseAssembler()
+  {
+#ifdef JS_DISASM_ARM64
+      spew_.spewOrphans();
+#endif
+  }
+
+ public:
+  // Return the Instruction at a given byte offset.
+  Instruction* getInstructionAt(BufferOffset offset) {
+    return armbuffer_.getInst(offset);
+  }
+
+  // Return the byte offset of a bound label.
+  template <typename T>
+  inline T GetLabelByteOffset(const js::jit::Label* label) {
+    VIXL_ASSERT(label->bound());
+    static_assert(sizeof(T) >= sizeof(uint32_t));
+    return reinterpret_cast<T>(label->offset());
+  }
+
+ protected:
+  // Get the buffer offset of the next inserted instruction. This may flush
+  // constant pools.
+  BufferOffset nextInstrOffset() {
+    return armbuffer_.nextInstrOffset();
+  }
+
+  // Get the next usable buffer offset. Note that a constant pool may be placed
+  // here before the next instruction is emitted.
+  BufferOffset nextOffset() const {
+    return armbuffer_.nextOffset();
+  }
+
+  // Allocate memory in the buffer by forwarding to armbuffer_.
+  // Propagate OOM errors.
+  BufferOffset allocLiteralLoadEntry(size_t numInst, unsigned numPoolEntries,
+				     uint8_t* inst, uint8_t* data,
+				     const LiteralDoc& doc = LiteralDoc(),
+				     ARMBuffer::PoolEntry* pe = nullptr)
+  {
+    MOZ_ASSERT(inst);
+    MOZ_ASSERT(numInst == 1);	/* If not, then fix disassembly */
+    BufferOffset offset = armbuffer_.allocEntry(numInst, numPoolEntries, inst,
+                                                data, pe);
+    propagateOOM(offset.assigned());
+#ifdef JS_DISASM_ARM64
+    Instruction* instruction = armbuffer_.getInstOrNull(offset);
+    if (instruction)
+        spewLiteralLoad(offset,
+                        reinterpret_cast<vixl::Instruction*>(instruction), doc);
+#endif
+    return offset;
+  }
+
+#ifdef JS_DISASM_ARM64
+  DisassemblerSpew spew_;
+
+  void spew(BufferOffset offs, const vixl::Instruction* instr) {
+    if (spew_.isDisabled() || !instr)
+      return;
+
+    char buffer[2048];
+    DisassembleInstruction(buffer, sizeof(buffer), instr);
+    spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s",
+               (uint32_t)offs.getOffset(),
+               instr->InstructionBits(), InstrIndent, buffer);
+  }
+
+  void spewBranch(BufferOffset offs,
+                  const vixl::Instruction* instr, const LabelDoc& target) {
+    if (spew_.isDisabled() || !instr)
+      return;
+
+    char buffer[2048];
+    DisassembleInstruction(buffer, sizeof(buffer), instr);
+
+    char labelBuf[128];
+    labelBuf[0] = 0;
+
+    bool hasTarget = target.valid;
+    if (!hasTarget)
+      SprintfLiteral(labelBuf, "-> (link-time target)");
+
+    if (instr->IsImmBranch() && hasTarget) {
+      // The target information in the instruction is likely garbage, so remove it.
+      // The target label will in any case be printed if we have it.
+      //
+      // The format of the instruction disassembly is /.*#.*/.  Strip the # and later.
+      size_t i;
+      const size_t BUFLEN = sizeof(buffer)-1;
+      for ( i=0 ; i < BUFLEN && buffer[i] && buffer[i] != '#' ; i++ )
+	;
+      buffer[i] = 0;
+
+      SprintfLiteral(labelBuf, "-> %d%s", target.doc, !target.bound ? "f" : "");
+      hasTarget = false;
+    }
+
+    spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s%s",
+               (uint32_t)offs.getOffset(),
+               instr->InstructionBits(), InstrIndent, buffer, labelBuf);
+
+    if (hasTarget)
+      spew_.spewRef(target);
+  }
+
+  void spewLiteralLoad(BufferOffset offs,
+                       const vixl::Instruction* instr, const LiteralDoc& doc) {
+    if (spew_.isDisabled() || !instr)
+      return;
+
+    char buffer[2048];
+    DisassembleInstruction(buffer, sizeof(buffer), instr);
+
+    char litbuf[2048];
+    spew_.formatLiteral(doc, litbuf, sizeof(litbuf));
+
+    // The instruction will have the form /^.*pc\+0/ followed by junk that we
+    // don't need; try to strip it.
+
+    char *probe = strstr(buffer, "pc+0");
+    if (probe)
+      *(probe + 4) = 0;
+    spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s    ; .const %s",
+               (uint32_t)offs.getOffset(),
+               instr->InstructionBits(), InstrIndent, buffer, litbuf);
+  }
+
+  LabelDoc refLabel(Label* label) {
+    if (spew_.isDisabled())
+      return LabelDoc();
+
+    return spew_.refLabel(label);
+  }
+#else
+  LabelDoc refLabel(js::jit::Label*) {
+      return LabelDoc();
+  }
+#endif
+
+  // Emit the instruction, returning its offset.
+  BufferOffset Emit(Instr instruction, bool isBranch = false) {
+    static_assert(sizeof(instruction) == kInstructionSize);
+    // TODO: isBranch is obsolete and should be removed.
+    (void)isBranch;
+    MOZ_ASSERT(hasCreator());
+    BufferOffset offs = armbuffer_.putInt(*(uint32_t*)(&instruction));
+#ifdef JS_DISASM_ARM64
+    if (!isBranch)
+        spew(offs, armbuffer_.getInstOrNull(offs));
+#endif
+    return offs;
+  }
+
+  BufferOffset EmitBranch(Instr instruction, const LabelDoc& doc) {
+    BufferOffset offs = Emit(instruction, true);
+#ifdef JS_DISASM_ARM64
+    spewBranch(offs, armbuffer_.getInstOrNull(offs), doc);
+#endif
+    return offs;
+  }
+
+ public:
+  // Emit the instruction at |at|.
+  static void Emit(Instruction* at, Instr instruction) {
+    static_assert(sizeof(instruction) == kInstructionSize);
+    memcpy(at, &instruction, sizeof(instruction));
+  }
+
+  static void EmitBranch(Instruction* at, Instr instruction) {
+    // TODO: Assert that the buffer already has the instruction marked as a branch.
+    Emit(at, instruction);
+  }
+
+  // Emit data inline in the instruction stream.
+  BufferOffset EmitData(void const * data, unsigned size) {
+    VIXL_ASSERT(size % 4 == 0);
+    MOZ_ASSERT(hasCreator());
+    return armbuffer_.allocEntry(size / sizeof(uint32_t), 0, (uint8_t*)(data), nullptr);
+  }
+
+ public:
+  // Size of the code generated in bytes, including pools.
+  size_t SizeOfCodeGenerated() const {
+    return armbuffer_.size();
+  }
+
+  // Move the pool into the instruction stream.
+  void flushBuffer() {
+    armbuffer_.flushPool();
+  }
+
+  // Inhibit pool flushing for the given number of instructions.
+  // Generating more than |maxInst| instructions in a no-pool region
+  // triggers an assertion within the ARMBuffer.
+  // Does not nest.
+  void enterNoPool(size_t maxInst) {
+    armbuffer_.enterNoPool(maxInst);
+  }
+
+  // Marks the end of a no-pool region.
+  void leaveNoPool() {
+    armbuffer_.leaveNoPool();
+  }
+
+  void enterNoNops() {
+    armbuffer_.enterNoNops();
+  }
+  void leaveNoNops() {
+    armbuffer_.leaveNoNops();
+  }
+
+ public:
+  // Static interface used by IonAssemblerBufferWithConstantPools.
+  static void InsertIndexIntoTag(uint8_t* load, uint32_t index);
+  static bool PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr);
+  static void PatchShortRangeBranchToVeneer(ARMBuffer*, unsigned rangeIdx, BufferOffset deadline,
+                                            BufferOffset veneer);
+  static uint32_t PlaceConstantPoolBarrier(int offset);
+
+  static void WritePoolHeader(uint8_t* start, js::jit::Pool* p, bool isNatural);
+  static void WritePoolFooter(uint8_t* start, js::jit::Pool* p, bool isNatural);
+  static void WritePoolGuard(BufferOffset branch, Instruction* inst, BufferOffset dest);
+
+ protected:
+  // Functions for managing Labels and linked lists of Label uses.
+
+  // Get the next Label user in the linked list of Label uses.
+  // Return an unassigned BufferOffset when the end of the list is reached.
+  BufferOffset NextLink(BufferOffset cur);
+
+  // Patch the instruction at cur to link to the instruction at next.
+  void SetNextLink(BufferOffset cur, BufferOffset next);
+
+  // Link the current (not-yet-emitted) instruction to the specified label,
+  // then return a raw offset to be encoded in the instruction.
+  ptrdiff_t LinkAndGetByteOffsetTo(BufferOffset branch, js::jit::Label* label);
+  ptrdiff_t LinkAndGetInstructionOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange,
+                                          js::jit::Label* label);
+  ptrdiff_t LinkAndGetPageOffsetTo(BufferOffset branch, js::jit::Label* label);
+
+  // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
+  ptrdiff_t LinkAndGetOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange,
+                               unsigned elementSizeBits, js::jit::Label* label);
+
+ protected:
+  // The buffer into which code and relocation info are generated.
+  ARMBuffer armbuffer_;
+};
+
+
+}  // namespace vixl
+
+
+#endif  // jit_arm64_vixl_MozBaseAssembler_vixl_h
+
diff --git a/js/src/jit/arm64/vixl/MozCachingDecoder.h b/js/src/jit/arm64/vixl/MozCachingDecoder.h
new file mode 100644
index 0000000000..5b4cfc17d5
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozCachingDecoder.h
@@ -0,0 +1,179 @@
+#ifndef VIXL_A64_MOZ_CACHING_DECODER_A64_H_
+#define VIXL_A64_MOZ_CACHING_DECODER_A64_H_
+
+#include "mozilla/HashTable.h"
+
+#include "jit/arm64/vixl/Decoder-vixl.h"
+#include "js/AllocPolicy.h"
+
+#ifdef DEBUG
+#define JS_CACHE_SIMULATOR_ARM64 1
+#endif
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+namespace vixl {
+
+// This enumeration list the different kind of instructions which can be
+// decoded. These kind correspond to the set of visitor defined by the default
+// Decoder.
+enum class InstDecodedKind : uint8_t {
+  NotDecodedYet,
+#define DECLARE(E) E,
+  VISITOR_LIST(DECLARE)
+#undef DECLARE
+};
+
+// A SinglePageDecodeCache is used to store the decoded kind of all instructions
+// in an executable page of code. Each time an instruction is decoded, its
+// decoded kind is recorded in this structure. The previous instruction value is
+// also recorded in this structure when using a debug build.
+//
+// The next time the same offset is visited, the instruction would be decoded
+// using the previously recorded decode kind. It is also compared against the
+// previously recorded bits of the instruction to check for potential missing
+// cache invalidations, in debug builds.
+//
+// This structure stores the equivalent of a single page of code to have better
+// memory locality when using the simulator. As opposed to having a hash-table
+// for all instructions. However a hash-table is used by the CachingDecoder to
+// map the prefixes of page addresses to these SinglePageDecodeCaches.
+class SinglePageDecodeCache {
+ public:
+  static const uintptr_t PageSize = 1 << 12;
+  static const uintptr_t PageMask = PageSize - 1;
+  static const uintptr_t InstSize = vixl::kInstructionSize;
+  static const uintptr_t InstMask = InstSize - 1;
+  static const uintptr_t InstPerPage = PageSize / InstSize;
+
+  SinglePageDecodeCache(const Instruction* inst)
+    : pageStart_(PageStart(inst))
+  {
+    memset(&decodeCache_, int(InstDecodedKind::NotDecodedYet), sizeof(decodeCache_));
+  }
+  // Compute the start address of the page which contains this instruction.
+  static uintptr_t PageStart(const Instruction* inst) {
+    return uintptr_t(inst) & ~PageMask;
+  }
+  // Returns whether the instruction decoded kind is stored in this
+  // SinglePageDecodeCache.
+  bool contains(const Instruction* inst) {
+    return pageStart_ == PageStart(inst);
+  }
+  void clearDecode(const Instruction* inst) {
+    uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize;
+    decodeCache_[offset] = InstDecodedKind::NotDecodedYet;
+  }
+  InstDecodedKind* decodePtr(const Instruction* inst) {
+    uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize;
+    uint32_t instValue = *reinterpret_cast<const uint32_t*>(inst);
+    instCache_[offset] = instValue;
+    return &decodeCache_[offset];
+  }
+  InstDecodedKind decode(const Instruction* inst) const {
+    uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize;
+    InstDecodedKind val = decodeCache_[offset];
+    uint32_t instValue = *reinterpret_cast<const uint32_t*>(inst);
+    MOZ_ASSERT_IF(val != InstDecodedKind::NotDecodedYet,
+                  instCache_[offset] == instValue);
+    return val;
+  }
+
+ private:
+  // Record the address at which the corresponding code page starts.
+  const uintptr_t pageStart_;
+
+  // Cache what instruction got decoded previously, in order to assert if we see
+  // any stale instructions after.
+  uint32_t instCache_[InstPerPage];
+
+  // Cache the decoding of the instruction such that we can skip the decoding
+  // part.
+  InstDecodedKind decodeCache_[InstPerPage];
+};
+
+// A DecoderVisitor which will record which visitor function should be called
+// the next time we want to decode the same instruction.
+class CachingDecoderVisitor : public DecoderVisitor {
+ public:
+  CachingDecoderVisitor() = default;
+  virtual ~CachingDecoderVisitor() {}
+
+#define DECLARE(A) virtual void Visit##A(const Instruction* instr) { \
+    if (last_) { \
+      MOZ_ASSERT(*last_ == InstDecodedKind::NotDecodedYet); \
+      *last_ = InstDecodedKind::A; \
+      last_ = nullptr; \
+    } \
+  };
+
+  VISITOR_LIST(DECLARE)
+#undef DECLARE
+
+  void setDecodePtr(InstDecodedKind* ptr) {
+    last_ = ptr;
+  }
+
+ private:
+  InstDecodedKind* last_;
+};
+
+// The Caching decoder works by extending the default vixl Decoder class. It
+// extends it by overloading the Decode function.
+//
+// The overloaded Decode function checks whether the instruction given as
+// argument got decoded before or since it got invalidated. If it was not
+// previously decoded, the value of the instruction is recorded as well as the
+// kind of instruction. Otherwise, the value of the instruction is checked
+// against the previously recorded value and the instruction kind is used to
+// skip the decoding visitor and resume the execution of instruction.
+//
+// The caching decoder stores the equivalent of a page of executable code in a
+// hash-table. Each SinglePageDecodeCache stores an array of decoded kind as
+// well as the value of the previously decoded instruction.
+//
+// When testing if an instruction was decoded before, we check if the address of
+// the instruction is contained in the last SinglePageDecodeCache. If it is not,
+// then the hash-table entry is queried and created if necessary, and the last
+// SinglePageDecodeCache is updated. Then, the last SinglePageDecodeCache
+// necessary contains the decoded kind of the instruction given as argument.
+//
+// The caching decoder add an extra function for flushing the cache, which is in
+// charge of clearing the decoded kind of instruction in the range of addresses
+// given as argument. This is indirectly called by
+// CPU::EnsureIAndDCacheCoherency.
+class CachingDecoder : public Decoder {
+  using ICacheMap = mozilla::HashMap<uintptr_t, SinglePageDecodeCache*>;
+ public:
+  CachingDecoder()
+      : lastPage_(nullptr)
+  {
+    PrependVisitor(&cachingDecoder_);
+  }
+  ~CachingDecoder() {
+    RemoveVisitor(&cachingDecoder_);
+  }
+
+  void Decode(const Instruction* instr);
+  void Decode(Instruction* instr) {
+    Decode(const_cast<const Instruction*>(instr));
+  }
+
+  void FlushICache(void* start, size_t size);
+
+ private:
+  // Record the type of the decoded instruction, to avoid decoding it a second
+  // time the next time we execute it.
+  CachingDecoderVisitor cachingDecoder_;
+
+  // Store the mapping of Instruction pointer to the corresponding
+  // SinglePageDecodeCache.
+  ICacheMap iCache_;
+
+  // Record the last SinglePageDecodeCache seen, such that we can quickly access
+  // it for the next instruction.
+  SinglePageDecodeCache* lastPage_;
+};
+
+}
+#endif // !JS_CACHE_SIMULATOR_ARM64
+#endif // !VIXL_A64_MOZ_CACHING_DECODER_A64_H_
diff --git a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp
new file mode 100644
index 0000000000..909cc590ae
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp
@@ -0,0 +1,226 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+#include "jit/arm64/vixl/Simulator-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+#include "util/WindowsWrapper.h"
+
+#if defined(XP_DARWIN)
+#  include <libkern/OSCacheControl.h>
+#endif
+
+namespace vixl {
+
+// Currently computes I and D cache line size.
+void CPU::SetUp() {
+  uint32_t cache_type_register = GetCacheType();
+
+  // The cache type register holds information about the caches, including I
+  // D caches line size.
+  static const int kDCacheLineSizeShift = 16;
+  static const int kICacheLineSizeShift = 0;
+  static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
+  static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
+
+  // The cache type register holds the size of the I and D caches in words as
+  // a power of two.
+  uint32_t dcache_line_size_power_of_two =
+      (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
+  uint32_t icache_line_size_power_of_two =
+      (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
+
+  dcache_line_size_ = 4 << dcache_line_size_power_of_two;
+  icache_line_size_ = 4 << icache_line_size_power_of_two;
+
+  // Bug 1521158 suggests that having CPU with different cache line sizes could
+  // cause issues as we would only invalidate half of the cache line of we
+  // invalidate every 128 bytes, but other little cores have a different stride
+  // such as 64 bytes. To be conservative, we will try reducing the stride to 32
+  // bytes, which should be smaller than any known cache line.
+  const uint32_t conservative_line_size = 32;
+  dcache_line_size_ = std::min(dcache_line_size_, conservative_line_size);
+  icache_line_size_ = std::min(icache_line_size_, conservative_line_size);
+}
+
+
+uint32_t CPU::GetCacheType() {
+#if defined(__aarch64__) && (defined(__linux__) || defined(__android__))
+  uint64_t cache_type_register;
+  // Copy the content of the cache type register to a core register.
+  __asm__ __volatile__ ("mrs %[ctr], ctr_el0"  // NOLINT
+                        : [ctr] "=r" (cache_type_register));
+  VIXL_ASSERT(IsUint32(cache_type_register));
+  return static_cast<uint32_t>(cache_type_register);
+#else
+  // This will lead to a cache with 1 byte long lines, which is fine since
+  // neither EnsureIAndDCacheCoherency nor the simulator will need this
+  // information.
+  return 0;
+#endif
+}
+
+void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
+#if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64)
+  // This code attempts to emulate what the following assembly sequence is
+  // doing, which is sending the information to all cores that some cache line
+  // have to be invalidated and invalidating them only on the current core.
+  //
+  // This is done by recording the current range to be flushed to all
+  // simulators, then if there is a simulator associated with the current
+  // thread, applying all flushed ranges as the "isb" instruction would do.
+  //
+  // As we have no control over the CPU cores used by the code generator and the
+  // execution threads, this code assumes that each thread runs on its own core.
+  //
+  // See Bug 1529933 for more detailed explanation of this issue.
+  using js::jit::SimulatorProcess;
+  js::jit::AutoLockSimulatorCache alsc;
+  if (length > 0) {
+    SimulatorProcess::recordICacheFlush(address, length);
+  }
+  Simulator* sim = vixl::Simulator::Current();
+  if (sim) {
+    sim->FlushICache();
+  }
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+  FlushInstructionCache(GetCurrentProcess(), address, length);
+#elif defined(XP_DARWIN)
+  sys_icache_invalidate(address, length);
+#elif defined(__aarch64__) && (defined(__linux__) || defined(__android__))
+  // Implement the cache synchronisation for all targets where AArch64 is the
+  // host, even if we're building the simulator for an AAarch64 host. This
+  // allows for cases where the user wants to simulate code as well as run it
+  // natively.
+
+  if (length == 0) {
+    return;
+  }
+
+  // The code below assumes user space cache operations are allowed.
+
+  // Work out the line sizes for each cache, and use them to determine the
+  // start addresses.
+  uintptr_t start = reinterpret_cast<uintptr_t>(address);
+  uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
+  uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
+  uintptr_t dline = start & ~(dsize - 1);
+  uintptr_t iline = start & ~(isize - 1);
+
+  // Cache line sizes are always a power of 2.
+  VIXL_ASSERT(IsPowerOf2(dsize));
+  VIXL_ASSERT(IsPowerOf2(isize));
+  uintptr_t end = start + length;
+
+  do {
+    __asm__ __volatile__ (
+      // Clean each line of the D cache containing the target data.
+      //
+      // dc       : Data Cache maintenance
+      //     c    : Clean
+      //      i   : Invalidate
+      //      va  : by (Virtual) Address
+      //        c : to the point of Coherency
+      // Original implementation used cvau, but changed to civac due to
+      // errata on Cortex-A53 819472, 826319, 827319 and 824069.
+      // See ARM DDI 0406B page B2-12 for more information.
+      //
+      "   dc    civac, %[dline]\n"
+      :
+      : [dline] "r" (dline)
+      // This code does not write to memory, but the "memory" dependency
+      // prevents GCC from reordering the code.
+      : "memory");
+    dline += dsize;
+  } while (dline < end);
+
+  __asm__ __volatile__ (
+    // Make sure that the data cache operations (above) complete before the
+    // instruction cache operations (below).
+    //
+    // dsb      : Data Synchronisation Barrier
+    //      ish : Inner SHareable domain
+    //
+    // The point of unification for an Inner Shareable shareability domain is
+    // the point by which the instruction and data caches of all the processors
+    // in that Inner Shareable shareability domain are guaranteed to see the
+    // same copy of a memory location.  See ARM DDI 0406B page B2-12 for more
+    // information.
+    "   dsb   ish\n"
+    : : : "memory");
+
+  do {
+    __asm__ __volatile__ (
+      // Invalidate each line of the I cache containing the target data.
+      //
+      // ic      : Instruction Cache maintenance
+      //    i    : Invalidate
+      //     va  : by Address
+      //       u : to the point of Unification
+      "   ic   ivau, %[iline]\n"
+      :
+      : [iline] "r" (iline)
+      : "memory");
+    iline += isize;
+  } while (iline < end);
+
+  __asm__ __volatile__(
+      // Make sure that the instruction cache operations (above) take effect
+      // before the isb (below).
+      "   dsb  ish\n"
+
+      // Ensure that any instructions already in the pipeline are discarded and
+      // reloaded from the new data.
+      // isb : Instruction Synchronisation Barrier
+      "   isb\n"
+      :
+      :
+      : "memory");
+#else
+  // If the host isn't AArch64, we must be using the simulator, so this function
+  // doesn't have to do anything.
+  USE(address, length);
+#endif
+}
+
+void CPU::FlushExecutionContext() {
+#if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64)
+  // Performing an 'isb' will ensure the current core instruction pipeline is
+  // synchronized with an icache flush executed by another core.
+  using js::jit::SimulatorProcess;
+  js::jit::AutoLockSimulatorCache alsc;
+  Simulator* sim = vixl::Simulator::Current();
+  if (sim) {
+    sim->FlushICache();
+  }
+#elif defined(__aarch64__)
+  // Ensure that any instructions already in the pipeline are discarded and
+  // reloaded from the icache.
+  __asm__ __volatile__("isb\n" : : : "memory");
+#endif
+}
+
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp b/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp
new file mode 100644
index 0000000000..398f864493
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp
@@ -0,0 +1,211 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/Architecture-arm64.h"
+#include "jit/arm64/vixl/Assembler-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+
+namespace vixl {
+
+bool Instruction::IsUncondB() const {
+  return Mask(UnconditionalBranchMask) == (UnconditionalBranchFixed | B);
+}
+
+
+bool Instruction::IsCondB() const {
+  return Mask(ConditionalBranchMask) == (ConditionalBranchFixed | B_cond);
+}
+
+
+bool Instruction::IsBL() const {
+  return Mask(UnconditionalBranchMask) == (UnconditionalBranchFixed | BL);
+}
+
+
+bool Instruction::IsBR() const {
+  return Mask(UnconditionalBranchToRegisterMask) == (UnconditionalBranchToRegisterFixed | BR);
+}
+
+
+bool Instruction::IsBLR() const {
+  return Mask(UnconditionalBranchToRegisterMask) == (UnconditionalBranchToRegisterFixed | BLR);
+}
+
+
+bool Instruction::IsTBZ() const {
+  return Mask(TestBranchMask) == TBZ;
+}
+
+
+bool Instruction::IsTBNZ() const {
+  return Mask(TestBranchMask) == TBNZ;
+}
+
+
+bool Instruction::IsCBZ() const {
+  return Mask(CompareBranchMask) == CBZ_w || Mask(CompareBranchMask) == CBZ_x;
+}
+
+
+bool Instruction::IsCBNZ() const {
+  return Mask(CompareBranchMask) == CBNZ_w || Mask(CompareBranchMask) == CBNZ_x;
+}
+
+
+bool Instruction::IsLDR() const {
+  return Mask(LoadLiteralMask) == LDR_x_lit;
+}
+
+
+bool Instruction::IsNOP() const {
+  return Mask(SystemHintMask) == HINT && ImmHint() == NOP;
+}
+
+
+bool Instruction::IsCSDB() const {
+  return Mask(SystemHintMask) == HINT && ImmHint() == CSDB;
+}
+
+
+bool Instruction::IsADR() const {
+  return Mask(PCRelAddressingMask) == ADR;
+}
+
+
+bool Instruction::IsADRP() const {
+  return Mask(PCRelAddressingMask) == ADRP;
+}
+
+
+bool Instruction::IsMovz() const {
+  return (Mask(MoveWideImmediateMask) == MOVZ_x) ||
+         (Mask(MoveWideImmediateMask) == MOVZ_w);
+}
+
+
+bool Instruction::IsMovk() const {
+  return (Mask(MoveWideImmediateMask) == MOVK_x) ||
+         (Mask(MoveWideImmediateMask) == MOVK_w);
+}
+
+bool Instruction::IsBranchLinkImm() const {
+  return Mask(UnconditionalBranchFMask) == (UnconditionalBranchFixed | BL);
+}
+
+
+bool Instruction::IsTargetReachable(const Instruction* target) const {
+    VIXL_ASSERT(((target - this) & 3) == 0);
+    int offset = (target - this) >> kInstructionSizeLog2;
+    switch (BranchType()) {
+      case CondBranchType:
+        return IsInt19(offset);
+      case UncondBranchType:
+        return IsInt26(offset);
+      case CompareBranchType:
+        return IsInt19(offset);
+      case TestBranchType:
+        return IsInt14(offset);
+      default:
+        VIXL_UNREACHABLE();
+    }
+}
+
+
+ptrdiff_t Instruction::ImmPCRawOffset() const {
+  ptrdiff_t offset;
+  if (IsPCRelAddressing()) {
+    // ADR and ADRP.
+    offset = ImmPCRel();
+  } else if (BranchType() == UnknownBranchType) {
+    offset = ImmLLiteral();
+  } else {
+    offset = ImmBranch();
+  }
+  return offset;
+}
+
+void
+Instruction::SetImmPCRawOffset(ptrdiff_t offset)
+{
+  if (IsPCRelAddressing()) {
+    // ADR and ADRP. We're encoding a raw offset here.
+    // See also SetPCRelImmTarget().
+    Instr imm = vixl::Assembler::ImmPCRelAddress(offset);
+    SetInstructionBits(Mask(~ImmPCRel_mask) | imm);
+  } else {
+    SetBranchImmTarget(this + (offset << kInstructionSizeLog2));
+  }
+}
+
+// Is this a stack pointer synchronization instruction as inserted by
+// MacroAssembler::syncStackPtr()?
+bool
+Instruction::IsStackPtrSync() const
+{
+    // The stack pointer sync is a move to the stack pointer.
+    // This is encoded as 'add sp, Rs, #0'.
+    return IsAddSubImmediate() && Rd() == js::jit::Registers::sp && ImmAddSub() == 0;
+}
+
+// Skip over a constant pool at |this| if there is one.
+//
+// If |this| is pointing to the artifical guard branch around a constant pool,
+// return the instruction after the pool. Otherwise return |this| itself.
+//
+// This function does not skip constant pools with a natural guard branch. It
+// is assumed that anyone inspecting the instruction stream understands about
+// branches that were inserted naturally.
+const Instruction*
+Instruction::skipPool() const
+{
+    // Artificial pool guards can only be B (rather than BR), and they must be
+    // forward branches.
+    if (!IsUncondB() || ImmUncondBranch() <= 0)
+        return this;
+
+    // Check for a constant pool header which has the high 16 bits set. See
+    // struct PoolHeader. Bit 15 indicates a natural pool guard when set. It
+    // must be clear which indicates an artificial pool guard.
+    const Instruction *header = InstructionAtOffset(kInstructionSize);
+    if (header->Mask(0xffff8000) != 0xffff0000)
+        return this;
+
+    // OK, this is an artificial jump around a constant pool.
+    return ImmPCOffsetTarget();
+}
+
+
+void Instruction::SetBits32(int msb, int lsb, unsigned value) {
+  uint32_t me;
+  memcpy(&me, this, sizeof(me));
+  uint32_t new_mask = (1 << (msb+1)) - (1 << lsb);
+  uint32_t keep_mask = ~new_mask;
+  me = (me & keep_mask) | ((value << lsb) & new_mask);
+  memcpy(this, &me, sizeof(me));
+}
+
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp
new file mode 100644
index 0000000000..9f817cf0a3
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp
@@ -0,0 +1,1258 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "mozilla/DebugOnly.h"
+
+#include "jit/arm64/vixl/Debugger-vixl.h"
+#include "jit/arm64/vixl/MozCachingDecoder.h"
+#include "jit/arm64/vixl/Simulator-vixl.h"
+#include "jit/IonTypes.h"
+#include "js/UniquePtr.h"
+#include "js/Utility.h"
+#include "threading/LockGuard.h"
+#include "vm/JSContext.h"
+#include "vm/Runtime.h"
+
+js::jit::SimulatorProcess* js::jit::SimulatorProcess::singleton_ = nullptr;
+
+namespace vixl {
+
+using mozilla::DebugOnly;
+using js::jit::ABIFunctionType;
+using js::jit::JitActivation;
+using js::jit::SimulatorProcess;
+
+Simulator::Simulator(Decoder* decoder, FILE* stream)
+  : stream_(nullptr)
+  , print_disasm_(nullptr)
+  , instrumentation_(nullptr)
+  , stack_(nullptr)
+  , stack_limit_(nullptr)
+  , decoder_(nullptr)
+  , oom_(false)
+{
+    this->init(decoder, stream);
+
+    // If this environment variable is present, trace the executed instructions.
+    // (Very helpful for debugging code generation crashes.)
+    if (getenv("VIXL_TRACE")) {
+        set_trace_parameters(LOG_DISASM);
+    }
+}
+
+
+Simulator::~Simulator() {
+  js_free(stack_);
+  stack_ = nullptr;
+
+  // The decoder may outlive the simulator.
+  if (print_disasm_) {
+    decoder_->RemoveVisitor(print_disasm_);
+    js_delete(print_disasm_);
+    print_disasm_ = nullptr;
+  }
+
+  if (instrumentation_) {
+    decoder_->RemoveVisitor(instrumentation_);
+    js_delete(instrumentation_);
+    instrumentation_ = nullptr;
+  }
+}
+
+
+void Simulator::ResetState() {
+  // Reset the system registers.
+  nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
+  fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
+
+  // Reset registers to 0.
+  pc_ = nullptr;
+  pc_modified_ = false;
+  for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+    set_xreg(i, 0xbadbeef);
+  }
+  // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
+  uint64_t nan_bits = UINT64_C(0x7ff0dead7f8beef1);
+  VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
+  VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
+  for (unsigned i = 0; i < kNumberOfFPRegisters; i++) {
+    set_dreg_bits(i, nan_bits);
+  }
+  // Returning to address 0 exits the Simulator.
+  set_lr(kEndOfSimAddress);
+}
+
+
+void Simulator::init(Decoder* decoder, FILE* stream) {
+  // Ensure that shift operations act as the simulator expects.
+  VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
+  VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7FFFFFFF);
+
+  instruction_stats_ = false;
+
+  // Set up the decoder.
+  decoder_ = decoder;
+  decoder_->AppendVisitor(this);
+
+  stream_ = stream;
+  print_disasm_ = js_new<PrintDisassembler>(stream_);
+  if (!print_disasm_) {
+    oom_ = true;
+    return;
+  }
+  set_coloured_trace(false);
+  trace_parameters_ = LOG_NONE;
+
+  ResetState();
+
+  // Allocate and set up the simulator stack.
+  stack_ = js_pod_malloc<byte>(stack_size_);
+  if (!stack_) {
+    oom_ = true;
+    return;
+  }
+  stack_limit_ = stack_ + stack_protection_size_;
+  // Configure the starting stack pointer.
+  //  - Find the top of the stack.
+  byte * tos = stack_ + stack_size_;
+  //  - There's a protection region at both ends of the stack.
+  tos -= stack_protection_size_;
+  //  - The stack pointer must be 16-byte aligned.
+  tos = AlignDown(tos, 16);
+  set_sp(tos);
+
+  // Set the sample period to 10, as the VIXL examples and tests are short.
+  if (getenv("VIXL_STATS")) {
+    instrumentation_ = js_new<Instrument>("vixl_stats.csv", 10);
+    if (!instrumentation_) {
+      oom_ = true;
+      return;
+    }
+  }
+
+  // Print a warning about exclusive-access instructions, but only the first
+  // time they are encountered. This warning can be silenced using
+  // SilenceExclusiveAccessWarning().
+  print_exclusive_access_warning_ = true;
+}
+
+
+Simulator* Simulator::Current() {
+  JSContext* cx = js::TlsContext.get();
+  if (!cx) {
+    return nullptr;
+  }
+  JSRuntime* rt = cx->runtime();
+  if (!rt) {
+    return nullptr;
+  }
+  if (!js::CurrentThreadCanAccessRuntime(rt)) {
+      return nullptr;
+  }
+  return cx->simulator();
+}
+
+
+Simulator* Simulator::Create() {
+  Decoder *decoder = js_new<Decoder>();
+  if (!decoder)
+    return nullptr;
+
+  // FIXME: This just leaks the Decoder object for now, which is probably OK.
+  // FIXME: We should free it at some point.
+  // FIXME: Note that it can't be stored in the SimulatorRuntime due to lifetime conflicts.
+  js::UniquePtr<Simulator> sim;
+  if (getenv("USE_DEBUGGER") != nullptr) {
+    sim.reset(js_new<Debugger>(decoder, stdout));
+  } else {
+    sim.reset(js_new<Simulator>(decoder, stdout));
+  }
+
+  // Check if Simulator:init ran out of memory.
+  if (sim && sim->oom()) {
+    return nullptr;
+  }
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+  // Register the simulator in the Simulator process to handle cache flushes
+  // across threads.
+  js::jit::AutoLockSimulatorCache alsc;
+  if (!SimulatorProcess::registerSimulator(sim.get())) {
+    return nullptr;
+  }
+#endif
+
+  return sim.release();
+}
+
+
+void Simulator::Destroy(Simulator* sim) {
+#ifdef JS_CACHE_SIMULATOR_ARM64
+  if (sim) {
+    js::jit::AutoLockSimulatorCache alsc;
+    SimulatorProcess::unregisterSimulator(sim);
+  }
+#endif
+
+  js_delete(sim);
+}
+
+
+void Simulator::ExecuteInstruction() {
+  // The program counter should always be aligned.
+  VIXL_ASSERT(IsWordAligned(pc_));
+#ifdef JS_CACHE_SIMULATOR_ARM64
+  if (pendingCacheRequests) {
+      // We're here emulating the behavior of the membarrier carried over on
+      // real hardware does; see syscalls to membarrier in MozCpu-vixl.cpp.
+      // There's a slight difference that the simulator is not being
+      // interrupted: instead, we effectively run the icache flush request
+      // before executing the next instruction, which is close enough and
+      // sufficient for our use case.
+      js::jit::AutoLockSimulatorCache alsc;
+      FlushICache();
+  }
+#endif
+  decoder_->Decode(pc_);
+  increment_pc();
+}
+
+
+uintptr_t Simulator::stackLimit() const {
+  return reinterpret_cast<uintptr_t>(stack_limit_);
+}
+
+
+uintptr_t* Simulator::addressOfStackLimit() {
+  return (uintptr_t*)&stack_limit_;
+}
+
+
+bool Simulator::overRecursed(uintptr_t newsp) const {
+  if (newsp == 0) {
+    newsp = get_sp();
+  }
+  return newsp <= stackLimit();
+}
+
+
+bool Simulator::overRecursedWithExtra(uint32_t extra) const {
+  uintptr_t newsp = get_sp() - extra;
+  return newsp <= stackLimit();
+}
+
+
+JS::ProfilingFrameIterator::RegisterState
+Simulator::registerState()
+{
+  JS::ProfilingFrameIterator::RegisterState state;
+  state.pc = (uint8_t*) get_pc();
+  state.fp = (uint8_t*) get_fp();
+  state.lr = (uint8_t*) get_lr();
+  state.sp = (uint8_t*) get_sp();
+  return state;
+}
+
+int64_t Simulator::call(uint8_t* entry, int argument_count, ...) {
+  va_list parameters;
+  va_start(parameters, argument_count);
+
+  // First eight arguments passed in registers.
+  VIXL_ASSERT(argument_count <= 8);
+  // This code should use the type of the called function
+  // (with templates, like the callVM machinery), but since the
+  // number of called functions is miniscule, their types have been
+  // divined from the number of arguments.
+  if (argument_count == 8) {
+    // EnterJitData::jitcode.
+    set_xreg(0, va_arg(parameters, int64_t));
+    // EnterJitData::maxArgc.
+    set_xreg(1, va_arg(parameters, unsigned));
+    // EnterJitData::maxArgv.
+    set_xreg(2, va_arg(parameters, int64_t));
+    // EnterJitData::osrFrame.
+    set_xreg(3, va_arg(parameters, int64_t));
+    // EnterJitData::calleeToken.
+    set_xreg(4, va_arg(parameters, int64_t));
+    // EnterJitData::scopeChain.
+    set_xreg(5, va_arg(parameters, int64_t));
+    // EnterJitData::osrNumStackValues.
+    set_xreg(6, va_arg(parameters, unsigned));
+    // Address of EnterJitData::result.
+    set_xreg(7, va_arg(parameters, int64_t));
+  } else if (argument_count == 2) {
+    // EntryArg* args
+    set_xreg(0, va_arg(parameters, int64_t));
+    // uint8_t* GlobalData
+    set_xreg(1, va_arg(parameters, int64_t));
+  } else if (argument_count == 1) { // irregexp
+    // InputOutputData& data
+    set_xreg(0, va_arg(parameters, int64_t));
+  } else if (argument_count == 0) { // testsJit.cpp
+    // accept.
+  } else {
+    MOZ_CRASH("Unknown number of arguments");
+  }
+
+  va_end(parameters);
+
+  // Call must transition back to native code on exit.
+  VIXL_ASSERT(get_lr() == int64_t(kEndOfSimAddress));
+
+  // Execute the simulation.
+  DebugOnly<int64_t> entryStack = get_sp();
+  RunFrom((Instruction*)entry);
+  DebugOnly<int64_t> exitStack = get_sp();
+  VIXL_ASSERT(entryStack == exitStack);
+
+  int64_t result = xreg(0);
+  if (getenv("USE_DEBUGGER")) {
+    printf("LEAVE\n");
+  }
+  return result;
+}
+
+
+// When the generated code calls a VM function (masm.callWithABI) we need to
+// call that function instead of trying to execute it with the simulator
+// (because it's x64 code instead of AArch64 code). We do that by redirecting the VM
+// call to a svc (Supervisor Call) instruction that is handled by the
+// simulator. We write the original destination of the jump just at a known
+// offset from the svc instruction so the simulator knows what to call.
+class Redirection
+{
+  friend class Simulator;
+
+  Redirection(void* nativeFunction, ABIFunctionType type)
+    : nativeFunction_(nativeFunction),
+    type_(type),
+    next_(nullptr)
+  {
+    next_ = SimulatorProcess::redirection();
+    SimulatorProcess::setRedirection(this);
+
+    Instruction* instr = (Instruction*)(&svcInstruction_);
+    vixl::Assembler::svc(instr, kCallRtRedirected);
+  }
+
+ public:
+  void* addressOfSvcInstruction() { return &svcInstruction_; }
+  void* nativeFunction() const { return nativeFunction_; }
+  ABIFunctionType type() const { return type_; }
+
+  static Redirection* Get(void* nativeFunction, ABIFunctionType type) {
+    js::jit::AutoLockSimulatorCache alsr;
+
+    // TODO: Store srt_ in the simulator for this assertion.
+    // VIXL_ASSERT_IF(pt->simulator(), pt->simulator()->srt_ == srt);
+
+    Redirection* current = SimulatorProcess::redirection();
+    for (; current != nullptr; current = current->next_) {
+      if (current->nativeFunction_ == nativeFunction) {
+        VIXL_ASSERT(current->type() == type);
+        return current;
+      }
+    }
+
+    // Note: we can't use js_new here because the constructor is private.
+    js::AutoEnterOOMUnsafeRegion oomUnsafe;
+    Redirection* redir = js_pod_malloc<Redirection>(1);
+    if (!redir)
+        oomUnsafe.crash("Simulator redirection");
+    new(redir) Redirection(nativeFunction, type);
+    return redir;
+  }
+
+  static const Redirection* FromSvcInstruction(const Instruction* svcInstruction) {
+    const uint8_t* addrOfSvc = reinterpret_cast<const uint8_t*>(svcInstruction);
+    const uint8_t* addrOfRedirection = addrOfSvc - offsetof(Redirection, svcInstruction_);
+    return reinterpret_cast<const Redirection*>(addrOfRedirection);
+  }
+
+ private:
+  void* nativeFunction_;
+  uint32_t svcInstruction_;
+  ABIFunctionType type_;
+  Redirection* next_;
+};
+
+
+
+
+void* Simulator::RedirectNativeFunction(void* nativeFunction, ABIFunctionType type) {
+  Redirection* redirection = Redirection::Get(nativeFunction, type);
+  return redirection->addressOfSvcInstruction();
+}
+
+void Simulator::VisitException(const Instruction* instr) {
+  if (instr->InstructionBits() == UNDEFINED_INST_PATTERN) {
+    uint8_t* newPC;
+    if (js::wasm::HandleIllegalInstruction(registerState(), &newPC)) {
+      set_pc((Instruction*)newPC);
+      return;
+    }
+    DoUnreachable(instr);
+  }
+
+  switch (instr->Mask(ExceptionMask)) {
+    case BRK: {
+      int lowbit  = ImmException_offset;
+      int highbit = ImmException_offset + ImmException_width - 1;
+      HostBreakpoint(instr->Bits(highbit, lowbit));
+      break;
+    }
+    case HLT:
+      switch (instr->ImmException()) {
+        case kTraceOpcode:
+          DoTrace(instr);
+          return;
+        case kLogOpcode:
+          DoLog(instr);
+          return;
+        case kPrintfOpcode:
+          DoPrintf(instr);
+          return;
+        default:
+          HostBreakpoint();
+          return;
+      }
+    case SVC:
+      // The SVC instruction is hijacked by the JIT as a pseudo-instruction
+      // causing the Simulator to execute host-native code for callWithABI.
+      switch (instr->ImmException()) {
+        case kCallRtRedirected:
+          VisitCallRedirection(instr);
+          return;
+        case kMarkStackPointer: {
+          js::AutoEnterOOMUnsafeRegion oomUnsafe;
+          if (!spStack_.append(get_sp()))
+            oomUnsafe.crash("tracking stack for ARM64 simulator");
+          return;
+        }
+        case kCheckStackPointer: {
+          DebugOnly<int64_t> current = get_sp();
+          DebugOnly<int64_t> expected = spStack_.popCopy();
+          VIXL_ASSERT(current == expected);
+          return;
+        }
+        default:
+          VIXL_UNIMPLEMENTED();
+      }
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::setGPR32Result(int32_t result) {
+    set_wreg(0, result);
+}
+
+
+void Simulator::setGPR64Result(int64_t result) {
+    set_xreg(0, result);
+}
+
+
+void Simulator::setFP32Result(float result) {
+    set_sreg(0, result);
+}
+
+
+void Simulator::setFP64Result(double result) {
+    set_dreg(0, result);
+}
+
+
+typedef int64_t (*Prototype_General0)();
+typedef int64_t (*Prototype_General1)(int64_t arg0);
+typedef int64_t (*Prototype_General2)(int64_t arg0, int64_t arg1);
+typedef int64_t (*Prototype_General3)(int64_t arg0, int64_t arg1, int64_t arg2);
+typedef int64_t (*Prototype_General4)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3);
+typedef int64_t (*Prototype_General5)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3,
+                                      int64_t arg4);
+typedef int64_t (*Prototype_General6)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3,
+                                      int64_t arg4, int64_t arg5);
+typedef int64_t (*Prototype_General7)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3,
+                                      int64_t arg4, int64_t arg5, int64_t arg6);
+typedef int64_t (*Prototype_General8)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3,
+                                      int64_t arg4, int64_t arg5, int64_t arg6, int64_t arg7);
+typedef int64_t (*Prototype_GeneralGeneralGeneralInt64)(int64_t arg0, int64_t arg1, int64_t arg2,
+                                                        int64_t arg3);
+typedef int64_t (*Prototype_GeneralGeneralInt64Int64)(int64_t arg0, int64_t arg1, int64_t arg2,
+                                                      int64_t arg3);
+
+typedef int64_t (*Prototype_Int_Double)(double arg0);
+typedef int64_t (*Prototype_Int_IntDouble)(int64_t arg0, double arg1);
+typedef int64_t (*Prototype_Int_DoubleInt)(double arg0, int64_t arg1);
+typedef int64_t (*Prototype_Int_DoubleIntInt)(double arg0, uint64_t arg1, uint64_t arg2);
+typedef int64_t (*Prototype_Int_IntDoubleIntInt)(uint64_t arg0, double arg1,
+                                                 uint64_t arg2, uint64_t arg3);
+
+typedef float (*Prototype_Float32_Float32)(float arg0);
+typedef int64_t (*Prototype_Int_Float32)(float arg0);
+typedef float (*Prototype_Float32_Float32Float32)(float arg0, float arg1);
+
+typedef double (*Prototype_Double_None)();
+typedef double (*Prototype_Double_Double)(double arg0);
+typedef double (*Prototype_Double_Int)(int64_t arg0);
+typedef double (*Prototype_Double_DoubleInt)(double arg0, int64_t arg1);
+typedef double (*Prototype_Double_IntDouble)(int64_t arg0, double arg1);
+typedef double (*Prototype_Double_DoubleDouble)(double arg0, double arg1);
+typedef double (*Prototype_Double_DoubleDoubleDouble)(double arg0, double arg1, double arg2);
+typedef double (*Prototype_Double_DoubleDoubleDoubleDouble)(double arg0, double arg1,
+                                                            double arg2, double arg3);
+
+typedef int32_t (*Prototype_Int32_General)(int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32)(int64_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32)(int64_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32)(int64_t,
+                                                               int32_t,
+                                                               int32_t,
+                                                               int32_t,
+                                                               int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32)(int64_t,
+                                                                    int32_t,
+                                                                    int32_t,
+                                                                    int32_t,
+                                                                    int32_t,
+                                                                    int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32General)(int64_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General)(int64_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General)(int64_t,
+                                                                      int32_t,
+                                                                      float,
+                                                                      float,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General)(int64_t,
+                                                                      int32_t,
+                                                                      float,
+                                                                      float,
+                                                                      float,
+                                                                      float,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General)(int64_t,
+                                                                      int32_t,
+                                                                      float,
+                                                                      float,
+                                                                      int32_t,
+                                                                      float,
+                                                                      float,
+                                                                      int32_t,
+                                                                      float,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int32_t,
+                                                                      int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32General)(int64_t,
+                                                                 int32_t,
+                                                                 int32_t,
+                                                                 int32_t,
+                                                                 int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int64)(int64_t,
+                                                          int32_t,
+                                                          int32_t,
+                                                          int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32General)(int64_t,
+                                                            int32_t,
+                                                            int32_t,
+                                                            int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int64Int64)(int64_t,
+                                                          int32_t,
+                                                          int64_t,
+                                                          int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32)(int64_t,
+                                                            int32_t,
+                                                            int64_t,
+                                                            int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32Int32)(int64_t,
+                                                                 int32_t,
+                                                                 int64_t,
+                                                                 int32_t,
+                                                                 int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneral)(int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralGeneral)(int64_t,
+                                                         int64_t,
+                                                         int64_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32Int32)(int64_t,
+                                                            int64_t,
+                                                            int32_t,
+                                                            int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int32Int32)(int64_t, int64_t,
+                                                               int32_t, int32_t,
+                                                               int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32)(int64_t, int64_t,
+                                                     int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64)(int64_t, int64_t,
+                                                          int32_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64General)(int64_t, int64_t,
+                                                                 int32_t, int64_t,
+                                                                 int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64)(int64_t, int64_t,
+                                                                 int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64General)(int64_t, int64_t,
+                                                                 int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64General)(int64_t, int64_t,
+                                                                 int64_t, int64_t,
+                                                                 int64_t);
+typedef int64_t (*Prototype_General_GeneralInt32)(int64_t, int32_t);
+typedef int64_t (*Prototype_General_GeneralInt32Int32)(int64_t,
+                                                       int32_t,
+                                                       int32_t);
+typedef int64_t (*Prototype_General_GeneralInt32General)(int64_t,
+                                                         int32_t,
+                                                         int64_t);
+typedef int64_t (*Prototype_General_GeneralInt32Int32GeneralInt32)(int64_t,
+                                                                   int32_t,
+                                                                   int32_t,
+                                                                   int64_t,
+                                                                   int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32)(
+    int64_t,
+    int64_t,
+    int32_t,
+    int64_t,
+    int32_t,
+    int32_t,
+    int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32General)(int64_t,
+                                                              int64_t,
+                                                              int32_t,
+                                                              int64_t);
+typedef int64_t (*Prototype_Int64_General)(int64_t);
+typedef int64_t (*Prototype_Int64_GeneralInt64)(int64_t, int64_t);
+
+// Simulator support for callWithABI().
+void
+Simulator::VisitCallRedirection(const Instruction* instr)
+{
+  VIXL_ASSERT(instr->Mask(ExceptionMask) == SVC);
+  VIXL_ASSERT(instr->ImmException() == kCallRtRedirected);
+
+  const Redirection* redir = Redirection::FromSvcInstruction(instr);
+  uintptr_t nativeFn = reinterpret_cast<uintptr_t>(redir->nativeFunction());
+
+  // Stack must be aligned prior to the call.
+  // FIXME: It's actually our job to perform the alignment...
+  //VIXL_ASSERT((xreg(31, Reg31IsStackPointer) & (StackAlignment - 1)) == 0);
+
+  // Used to assert that callee-saved registers are preserved.
+  DebugOnly<int64_t> x19 = xreg(19);
+  DebugOnly<int64_t> x20 = xreg(20);
+  DebugOnly<int64_t> x21 = xreg(21);
+  DebugOnly<int64_t> x22 = xreg(22);
+  DebugOnly<int64_t> x23 = xreg(23);
+  DebugOnly<int64_t> x24 = xreg(24);
+  DebugOnly<int64_t> x25 = xreg(25);
+  DebugOnly<int64_t> x26 = xreg(26);
+  DebugOnly<int64_t> x27 = xreg(27);
+  DebugOnly<int64_t> x28 = xreg(28);
+  DebugOnly<int64_t> x29 = xreg(29);
+  DebugOnly<int64_t> savedSP = get_sp();
+
+  // Remember LR for returning from the "call".
+  int64_t savedLR = xreg(30);
+
+  // Allow recursive Simulator calls: returning from the call must stop
+  // the simulation and transition back to native Simulator code.
+  set_xreg(30, int64_t(kEndOfSimAddress));
+
+  // Store argument register values in local variables for ease of use below.
+  int64_t x0 = xreg(0);
+  int64_t x1 = xreg(1);
+  int64_t x2 = xreg(2);
+  int64_t x3 = xreg(3);
+  int64_t x4 = xreg(4);
+  int64_t x5 = xreg(5);
+  int64_t x6 = xreg(6);
+  int64_t x7 = xreg(7);
+  int64_t x8 = xreg(8);
+  double d0 = dreg(0);
+  double d1 = dreg(1);
+  double d2 = dreg(2);
+  double d3 = dreg(3);
+  float s0 = sreg(0);
+  float s1 = sreg(1);
+  float s2 = sreg(2);
+  float s3 = sreg(3);
+  float s4 = sreg(4);
+
+  // Dispatch the call and set the return value.
+  switch (redir->type()) {
+    // Cases with int64_t return type.
+    case js::jit::Args_General0: {
+      int64_t ret = reinterpret_cast<Prototype_General0>(nativeFn)();
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General1: {
+      int64_t ret = reinterpret_cast<Prototype_General1>(nativeFn)(x0);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General2: {
+      int64_t ret = reinterpret_cast<Prototype_General2>(nativeFn)(x0, x1);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General3: {
+      int64_t ret = reinterpret_cast<Prototype_General3>(nativeFn)(x0, x1, x2);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General4: {
+      int64_t ret = reinterpret_cast<Prototype_General4>(nativeFn)(x0, x1, x2, x3);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General5: {
+      int64_t ret = reinterpret_cast<Prototype_General5>(nativeFn)(x0, x1, x2, x3, x4);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General6: {
+      int64_t ret = reinterpret_cast<Prototype_General6>(nativeFn)(x0, x1, x2, x3, x4, x5);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General7: {
+      int64_t ret = reinterpret_cast<Prototype_General7>(nativeFn)(x0, x1, x2, x3, x4, x5, x6);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General8: {
+      int64_t ret = reinterpret_cast<Prototype_General8>(nativeFn)(x0, x1, x2, x3, x4, x5, x6, x7);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_Int_GeneralGeneralGeneralInt64: {
+      int64_t ret = reinterpret_cast<Prototype_GeneralGeneralGeneralInt64>(nativeFn)(x0, x1, x2, x3);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_Int_GeneralGeneralInt64Int64: {
+      int64_t ret = reinterpret_cast<Prototype_GeneralGeneralInt64Int64>(nativeFn)(x0, x1, x2, x3);
+      setGPR64Result(ret);
+      break;
+    }
+
+    // Cases with GPR return type. This can be int32 or int64, but int64 is a safer assumption.
+    case js::jit::Args_Int_Double: {
+      int64_t ret = reinterpret_cast<Prototype_Int_Double>(nativeFn)(d0);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_Int_IntDouble: {
+      int64_t ret = reinterpret_cast<Prototype_Int_IntDouble>(nativeFn)(x0, d0);
+      setGPR64Result(ret);
+      break;
+    }
+
+    case js::jit::Args_Int_DoubleInt: {
+      int64_t ret = reinterpret_cast<Prototype_Int_DoubleInt>(nativeFn)(d0, x0);
+      setGPR64Result(ret);
+      break;
+    }
+
+    case js::jit::Args_Int_IntDoubleIntInt: {
+      int64_t ret = reinterpret_cast<Prototype_Int_IntDoubleIntInt>(nativeFn)(x0, d0, x1, x2);
+      setGPR64Result(ret);
+      break;
+    }
+
+    case js::jit::Args_Int_DoubleIntInt: {
+      int64_t ret = reinterpret_cast<Prototype_Int_DoubleIntInt>(nativeFn)(d0, x0, x1);
+      setGPR64Result(ret);
+      break;
+    }
+
+    // Cases with float return type.
+    case js::jit::Args_Float32_Float32: {
+      float ret = reinterpret_cast<Prototype_Float32_Float32>(nativeFn)(s0);
+      setFP32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int_Float32: {
+      int64_t ret = reinterpret_cast<Prototype_Int_Float32>(nativeFn)(s0);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_Float32_Float32Float32: {
+      float ret = reinterpret_cast<Prototype_Float32_Float32Float32>(nativeFn)(s0, s1);
+      setFP32Result(ret);
+      break;
+    }
+
+    // Cases with double return type.
+    case js::jit::Args_Double_None: {
+      double ret = reinterpret_cast<Prototype_Double_None>(nativeFn)();
+      setFP64Result(ret);
+      break;
+    }
+    case js::jit::Args_Double_Double: {
+      double ret = reinterpret_cast<Prototype_Double_Double>(nativeFn)(d0);
+      setFP64Result(ret);
+      break;
+    }
+    case js::jit::Args_Double_Int: {
+      double ret = reinterpret_cast<Prototype_Double_Int>(nativeFn)(x0);
+      setFP64Result(ret);
+      break;
+    }
+    case js::jit::Args_Double_DoubleInt: {
+      double ret = reinterpret_cast<Prototype_Double_DoubleInt>(nativeFn)(d0, x0);
+      setFP64Result(ret);
+      break;
+    }
+    case js::jit::Args_Double_DoubleDouble: {
+      double ret = reinterpret_cast<Prototype_Double_DoubleDouble>(nativeFn)(d0, d1);
+      setFP64Result(ret);
+      break;
+    }
+    case js::jit::Args_Double_DoubleDoubleDouble: {
+      double ret = reinterpret_cast<Prototype_Double_DoubleDoubleDouble>(nativeFn)(d0, d1, d2);
+      setFP64Result(ret);
+      break;
+    }
+    case js::jit::Args_Double_DoubleDoubleDoubleDouble: {
+      double ret = reinterpret_cast<Prototype_Double_DoubleDoubleDoubleDouble>(nativeFn)(d0, d1, d2, d3);
+      setFP64Result(ret);
+      break;
+    }
+
+    case js::jit::Args_Double_IntDouble: {
+      double ret = reinterpret_cast<Prototype_Double_IntDouble>(nativeFn)(x0, d0);
+      setFP64Result(ret);
+      break;
+    }
+
+    case js::jit::Args_Int32_General: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_General>(nativeFn)(x0);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32>(nativeFn)(x0, x1);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int32: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32>(
+          nativeFn)(x0, x1, x2);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int32Int32Int32: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32>(
+              nativeFn)(x0, x1, x2, x3, x4);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int32Int32Int32Int32: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32Int32>(
+              nativeFn)(x0, x1, x2, x3, x4, x5);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int32Int32Int32General: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32General>(
+              nativeFn)(x0, x1, x2, x3, x4, x5);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int32Int32Int32Int32Int32General: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General>(
+              nativeFn)(x0, x1, x2, x3, x4, x5, x6, x7);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General>(
+              nativeFn)(x0, x1, s0, s1, x2, x3, x4, x5);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General>(
+              nativeFn)(x0, x1, s0, s1, s2, s3, x2, x3, x4, x5, x6);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General>(
+              nativeFn)(x0, x1, s0, s1, x2, s2, s3, x3, s4, x4, x5, x6, x7, x8);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int32Int32General: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32General>(
+              nativeFn)(x0, x1, x2, x3, x4);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int32Int64: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int64>(
+          nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int32General: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32General>(
+          nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32Int64Int64: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int64Int64>(
+          nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32GeneralInt32: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32>(
+          nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt32GeneralInt32Int32: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32Int32>(
+              nativeFn)(x0, x1, x2, x3, x4);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralGeneral: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralGeneral>(nativeFn)(x0, x1);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralGeneralGeneral: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralGeneralGeneral>(
+          nativeFn)(x0, x1, x2);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralGeneralInt32Int32: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralGeneralInt32Int32>(
+          nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt64Int32Int32Int32: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int32Int32>(
+          nativeFn)(x0, x1, x2, x3, x4);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt64Int32: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32>(
+          nativeFn)(x0, x1, x2);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt64Int32Int64: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64>(
+          nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt64Int32Int64General: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64General>(
+          nativeFn)(x0, x1, x2, x3, x4);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt64Int64Int64: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64>(
+          nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt64Int64General: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64General>(
+          nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralInt64Int64Int64General: {
+      int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64General>(
+          nativeFn)(x0, x1, x2, x3, x4);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_General_GeneralInt32: {
+      int64_t ret =
+          reinterpret_cast<Prototype_General_GeneralInt32>(nativeFn)(x0, x1);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General_GeneralInt32Int32: {
+      int64_t ret = reinterpret_cast<Prototype_General_GeneralInt32Int32>(
+          nativeFn)(x0, x1, x2);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General_GeneralInt32General: {
+      int64_t ret =
+          reinterpret_cast<Prototype_General_GeneralInt32General>(
+              nativeFn)(x0, x1, x2);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_General_GeneralInt32Int32GeneralInt32: {
+      int64_t ret =
+          reinterpret_cast<Prototype_General_GeneralInt32Int32GeneralInt32>(
+              nativeFn)(x0, x1, x2, x3, x4);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralGeneralInt32GeneralInt32Int32Int32: {
+      int32_t ret = reinterpret_cast<
+          Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32>(nativeFn)(
+          x0, x1, x2, x3, x4, x5, x6);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int32_GeneralGeneralInt32General: {
+      int32_t ret =
+          reinterpret_cast<Prototype_Int32_GeneralGeneralInt32General>(
+              nativeFn)(x0, x1, x2, x3);
+      setGPR32Result(ret);
+      break;
+    }
+    case js::jit::Args_Int64_General: {
+      int64_t ret =
+          reinterpret_cast<Prototype_Int64_General>(
+              nativeFn)(x0);
+      setGPR64Result(ret);
+      break;
+    }
+    case js::jit::Args_Int64_GeneralInt64: {
+      int64_t ret =
+          reinterpret_cast<Prototype_Int64_GeneralInt64>(
+              nativeFn)(x0, x1);
+      setGPR64Result(ret);
+      break;
+    }
+
+    default:
+      MOZ_CRASH("Unknown function type.");
+  }
+
+  // Nuke the volatile registers. x0-x7 are used as result registers, but except
+  // for x0, none are used in the above signatures.
+  for (int i = 1; i <= 18; i++) {
+    // Code feed 1 bad data
+    set_xreg(i, int64_t(0xc0defeed1badda7a));
+  }
+
+  // Assert that callee-saved registers are unchanged.
+  VIXL_ASSERT(xreg(19) == x19);
+  VIXL_ASSERT(xreg(20) == x20);
+  VIXL_ASSERT(xreg(21) == x21);
+  VIXL_ASSERT(xreg(22) == x22);
+  VIXL_ASSERT(xreg(23) == x23);
+  VIXL_ASSERT(xreg(24) == x24);
+  VIXL_ASSERT(xreg(25) == x25);
+  VIXL_ASSERT(xreg(26) == x26);
+  VIXL_ASSERT(xreg(27) == x27);
+  VIXL_ASSERT(xreg(28) == x28);
+  VIXL_ASSERT(xreg(29) == x29);
+
+  // Assert that the stack is unchanged.
+  VIXL_ASSERT(savedSP == get_sp());
+
+  // Simulate a return.
+  set_lr(savedLR);
+  set_pc((Instruction*)savedLR);
+  if (getenv("USE_DEBUGGER"))
+    printf("SVCRET\n");
+}
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+void
+Simulator::FlushICache()
+{
+  // Flush the caches recorded by the current thread as well as what got
+  // recorded from other threads before this call.
+  auto& vec = SimulatorProcess::getICacheFlushes(this);
+  for (auto& flush : vec) {
+    decoder_->FlushICache(flush.start, flush.length);
+  }
+  vec.clear();
+  pendingCacheRequests = false;
+}
+
+void CachingDecoder::Decode(const Instruction* instr) {
+  InstDecodedKind state;
+  if (lastPage_ && lastPage_->contains(instr)) {
+    state = lastPage_->decode(instr);
+  } else {
+    uintptr_t key = SinglePageDecodeCache::PageStart(instr);
+    ICacheMap::AddPtr p = iCache_.lookupForAdd(key);
+    if (p) {
+      lastPage_ = p->value();
+      state = lastPage_->decode(instr);
+    } else {
+      js::AutoEnterOOMUnsafeRegion oomUnsafe;
+      SinglePageDecodeCache* newPage = js_new<SinglePageDecodeCache>(instr);
+      if (!newPage || !iCache_.add(p, key, newPage)) {
+        oomUnsafe.crash("Simulator SinglePageDecodeCache");
+      }
+      lastPage_ = newPage;
+      state = InstDecodedKind::NotDecodedYet;
+    }
+  }
+
+  switch (state) {
+  case InstDecodedKind::NotDecodedYet: {
+    cachingDecoder_.setDecodePtr(lastPage_->decodePtr(instr));
+    this->Decoder::Decode(instr);
+    break;
+  }
+#define CASE(A) \
+  case InstDecodedKind::A: { \
+    Visit##A(instr); \
+    break; \
+  }
+
+  VISITOR_LIST(CASE)
+#undef CASE
+  }
+}
+
+void CachingDecoder::FlushICache(void* start, size_t size) {
+  MOZ_ASSERT(uintptr_t(start) % vixl::kInstructionSize == 0);
+  MOZ_ASSERT(size % vixl::kInstructionSize == 0);
+  const uint8_t* it = reinterpret_cast<const uint8_t*>(start);
+  const uint8_t* end = it + size;
+  SinglePageDecodeCache* last = nullptr;
+  for (; it < end; it += vixl::kInstructionSize) {
+    auto instr = reinterpret_cast<const Instruction*>(it);
+    if (last && last->contains(instr)) {
+      last->clearDecode(instr);
+    } else {
+      uintptr_t key = SinglePageDecodeCache::PageStart(instr);
+      ICacheMap::Ptr p = iCache_.lookup(key);
+      if (p) {
+        last = p->value();
+        last->clearDecode(instr);
+      }
+    }
+  }
+}
+#endif
+
+}  // namespace vixl
+
+namespace js {
+namespace jit {
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+void SimulatorProcess::recordICacheFlush(void* start, size_t length) {
+  singleton_->lock_.assertOwnedByCurrentThread();
+  AutoEnterOOMUnsafeRegion oomUnsafe;
+  ICacheFlush range{start, length};
+  for (auto& s : singleton_->pendingFlushes_) {
+    if (!s.records.append(range)) {
+      oomUnsafe.crash("Simulator recordFlushICache");
+    }
+  }
+}
+
+void SimulatorProcess::membarrier() {
+  singleton_->lock_.assertOwnedByCurrentThread();
+  for (auto& s : singleton_->pendingFlushes_) {
+    s.thread->pendingCacheRequests = true;
+  }
+}
+
+SimulatorProcess::ICacheFlushes& SimulatorProcess::getICacheFlushes(Simulator* sim) {
+  singleton_->lock_.assertOwnedByCurrentThread();
+  for (auto& s : singleton_->pendingFlushes_) {
+    if (s.thread == sim) {
+      return s.records;
+    }
+  }
+  MOZ_CRASH("Simulator is not registered in the SimulatorProcess");
+}
+
+bool SimulatorProcess::registerSimulator(Simulator* sim) {
+  singleton_->lock_.assertOwnedByCurrentThread();
+  ICacheFlushes empty;
+  SimFlushes simFlushes{sim, std::move(empty)};
+  return singleton_->pendingFlushes_.append(std::move(simFlushes));
+}
+
+void SimulatorProcess::unregisterSimulator(Simulator* sim) {
+  singleton_->lock_.assertOwnedByCurrentThread();
+  for (auto& s : singleton_->pendingFlushes_) {
+    if (s.thread == sim) {
+      singleton_->pendingFlushes_.erase(&s);
+      return;
+    }
+  }
+  MOZ_CRASH("Simulator is not registered in the SimulatorProcess");
+}
+#endif // !JS_CACHE_SIMULATOR_ARM64
+
+} // namespace jit
+} // namespace js
+
+vixl::Simulator* JSContext::simulator() const {
+  return simulator_;
+}
diff --git a/js/src/jit/arm64/vixl/Platform-vixl.h b/js/src/jit/arm64/vixl/Platform-vixl.h
new file mode 100644
index 0000000000..a4de54c785
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Platform-vixl.h
@@ -0,0 +1,39 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_PLATFORM_H
+#define VIXL_PLATFORM_H
+
+// Define platform specific functionalities.
+#include <signal.h>
+
+#include "jstypes.h"
+
+namespace vixl {
+inline void HostBreakpoint(int64_t code = 0) { raise(SIGINT); }
+} // namespace vixl
+
+#endif
diff --git a/js/src/jit/arm64/vixl/README.md b/js/src/jit/arm64/vixl/README.md
new file mode 100644
index 0000000000..7111753279
--- /dev/null
+++ b/js/src/jit/arm64/vixl/README.md
@@ -0,0 +1,7 @@
+This directory is a mix of VIXL files for ARM64, and files added to integrate
+VIXL within SpiderMonkey MacroAssembler. Many of SpiderMonkey extension would be
+in files prefixed with Moz*, but some might be spread across imported files when
+convenient.
+
+VIXL upstream sources can be found at:
+https://git.linaro.org/arm/vixl.git/about/
diff --git a/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h b/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h
new file mode 100644
index 0000000000..4b9064a89b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h
@@ -0,0 +1,140 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_SIMULATOR_CONSTANTS_A64_H_
+#define VIXL_A64_SIMULATOR_CONSTANTS_A64_H_
+
+namespace vixl {
+
+// Debug instructions.
+//
+// VIXL's macro-assembler and simulator support a few pseudo instructions to
+// make debugging easier. These pseudo instructions do not exist on real
+// hardware.
+//
+// TODO: Also consider allowing these pseudo-instructions to be disabled in the
+// simulator, so that users can check that the input is a valid native code.
+// (This isn't possible in all cases. Printf won't work, for example.)
+//
+// Each debug pseudo instruction is represented by a HLT instruction. The HLT
+// immediate field is used to identify the type of debug pseudo instruction.
+
+enum DebugHltOpcodes {
+  kPrintfOpcode,
+  kTraceOpcode,
+  kLogOpcode,
+  // Aliases.
+  kDebugHltFirstOpcode = kPrintfOpcode,
+  kDebugHltLastOpcode = kLogOpcode
+};
+
+// Each pseudo instruction uses a custom encoding for additional arguments, as
+// described below.
+
+// Unreachable - kUnreachableOpcode
+//
+// Instruction which should never be executed. This is used as a guard in parts
+// of the code that should not be reachable, such as in data encoded inline in
+// the instructions.
+
+// Printf - kPrintfOpcode
+//  - arg_count: The number of arguments.
+//  - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields.
+//
+// Simulate a call to printf.
+//
+// Floating-point and integer arguments are passed in separate sets of registers
+// in AAPCS64 (even for varargs functions), so it is not possible to determine
+// the type of each argument without some information about the values that were
+// passed in. This information could be retrieved from the printf format string,
+// but the format string is not trivial to parse so we encode the relevant
+// information with the HLT instruction.
+//
+// Also, the following registers are populated (as if for a native A64 call):
+//    x0: The format string
+// x1-x7: Optional arguments, if type == CPURegister::kRegister
+// d0-d7: Optional arguments, if type == CPURegister::kFPRegister
+const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
+const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
+const unsigned kPrintfLength = 3 * kInstructionSize;
+
+const unsigned kPrintfMaxArgCount = 4;
+
+// The argument pattern is a set of two-bit-fields, each with one of the
+// following values:
+enum PrintfArgPattern {
+  kPrintfArgW = 1,
+  kPrintfArgX = 2,
+  // There is no kPrintfArgS because floats are always converted to doubles in C
+  // varargs calls.
+  kPrintfArgD = 3
+};
+static const unsigned kPrintfArgPatternBits = 2;
+
+// Trace - kTraceOpcode
+//  - parameter: TraceParameter stored as a uint32_t
+//  - command: TraceCommand stored as a uint32_t
+//
+// Allow for trace management in the generated code. This enables or disables
+// automatic tracing of the specified information for every simulated
+// instruction.
+const unsigned kTraceParamsOffset = 1 * kInstructionSize;
+const unsigned kTraceCommandOffset = 2 * kInstructionSize;
+const unsigned kTraceLength = 3 * kInstructionSize;
+
+// Trace parameters.
+enum TraceParameters {
+  LOG_DISASM     = 1 << 0,  // Log disassembly.
+  LOG_REGS       = 1 << 1,  // Log general purpose registers.
+  LOG_VREGS      = 1 << 2,  // Log NEON and floating-point registers.
+  LOG_SYSREGS    = 1 << 3,  // Log the flags and system registers.
+  LOG_WRITE      = 1 << 4,  // Log writes to memory.
+
+  LOG_NONE       = 0,
+  LOG_STATE      = LOG_REGS | LOG_VREGS | LOG_SYSREGS,
+  LOG_ALL        = LOG_DISASM | LOG_STATE | LOG_WRITE
+};
+
+// Trace commands.
+enum TraceCommand {
+  TRACE_ENABLE   = 1,
+  TRACE_DISABLE  = 2
+};
+
+// Log - kLogOpcode
+//  - parameter: TraceParameter stored as a uint32_t
+//
+// Print the specified information once. This mechanism is separate from Trace.
+// In particular, _all_ of the specified registers are printed, rather than just
+// the registers that the instruction writes.
+//
+// Any combination of the TraceParameters values can be used, except that
+// LOG_DISASM is not supported for Log.
+const unsigned kLogParamsOffset = 1 * kInstructionSize;
+const unsigned kLogLength = 2 * kInstructionSize;
+}  // namespace vixl
+
+#endif  // VIXL_A64_SIMULATOR_CONSTANTS_A64_H_
diff --git a/js/src/jit/arm64/vixl/Simulator-vixl.cpp b/js/src/jit/arm64/vixl/Simulator-vixl.cpp
new file mode 100644
index 0000000000..71e1a31d46
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Simulator-vixl.cpp
@@ -0,0 +1,4371 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jstypes.h"
+
+#ifdef JS_SIMULATOR_ARM64
+
+#include "jit/arm64/vixl/Simulator-vixl.h"
+
+#include <cmath>
+#include <string.h>
+
+#include "jit/AtomicOperations.h"
+
+namespace vixl {
+
+const Instruction* Simulator::kEndOfSimAddress = NULL;
+
+void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
+  int width = msb - lsb + 1;
+  VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
+
+  bits <<= lsb;
+  uint32_t mask = ((1 << width) - 1) << lsb;
+  VIXL_ASSERT((mask & write_ignore_mask_) == 0);
+
+  value_ = (value_ & ~mask) | (bits & mask);
+}
+
+
+SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
+  switch (id) {
+    case NZCV:
+      return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
+    case FPCR:
+      return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
+    default:
+      VIXL_UNREACHABLE();
+      return SimSystemRegister();
+  }
+}
+
+
+void Simulator::Run() {
+  pc_modified_ = false;
+  while (pc_ != kEndOfSimAddress) {
+    ExecuteInstruction();
+    LogAllWrittenRegisters();
+  }
+}
+
+
+void Simulator::RunFrom(const Instruction* first) {
+  set_pc(first);
+  Run();
+}
+
+
+const char* Simulator::xreg_names[] = {
+"x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+"x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+"x24", "x25", "x26", "x27", "x28", "x29", "lr",  "xzr", "sp"};
+
+const char* Simulator::wreg_names[] = {
+"w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
+"w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
+"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
+"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr", "wsp"};
+
+const char* Simulator::sreg_names[] = {
+"s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
+"s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
+"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"};
+
+const char* Simulator::dreg_names[] = {
+"d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
+"d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
+"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
+
+const char* Simulator::vreg_names[] = {
+"v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+"v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"};
+
+
+
+const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
+  VIXL_ASSERT(code < kNumberOfRegisters);
+  // If the code represents the stack pointer, index the name after zr.
+  if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
+    code = kZeroRegCode + 1;
+  }
+  return wreg_names[code];
+}
+
+
+const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
+  VIXL_ASSERT(code < kNumberOfRegisters);
+  // If the code represents the stack pointer, index the name after zr.
+  if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
+    code = kZeroRegCode + 1;
+  }
+  return xreg_names[code];
+}
+
+
+const char* Simulator::SRegNameForCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfFPRegisters);
+  return sreg_names[code];
+}
+
+
+const char* Simulator::DRegNameForCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfFPRegisters);
+  return dreg_names[code];
+}
+
+
+const char* Simulator::VRegNameForCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfVRegisters);
+  return vreg_names[code];
+}
+
+
+#define COLOUR(colour_code)       "\033[0;" colour_code "m"
+#define COLOUR_BOLD(colour_code)  "\033[1;" colour_code "m"
+#define NORMAL  ""
+#define GREY    "30"
+#define RED     "31"
+#define GREEN   "32"
+#define YELLOW  "33"
+#define BLUE    "34"
+#define MAGENTA "35"
+#define CYAN    "36"
+#define WHITE   "37"
+void Simulator::set_coloured_trace(bool value) {
+  coloured_trace_ = value;
+
+  clr_normal          = value ? COLOUR(NORMAL)        : "";
+  clr_flag_name       = value ? COLOUR_BOLD(WHITE)    : "";
+  clr_flag_value      = value ? COLOUR(NORMAL)        : "";
+  clr_reg_name        = value ? COLOUR_BOLD(CYAN)     : "";
+  clr_reg_value       = value ? COLOUR(CYAN)          : "";
+  clr_vreg_name       = value ? COLOUR_BOLD(MAGENTA)  : "";
+  clr_vreg_value      = value ? COLOUR(MAGENTA)       : "";
+  clr_memory_address  = value ? COLOUR_BOLD(BLUE)     : "";
+  clr_warning         = value ? COLOUR_BOLD(YELLOW)   : "";
+  clr_warning_message = value ? COLOUR(YELLOW)        : "";
+  clr_printf          = value ? COLOUR(GREEN)         : "";
+}
+#undef COLOUR
+#undef COLOUR_BOLD
+#undef NORMAL
+#undef GREY
+#undef RED
+#undef GREEN
+#undef YELLOW
+#undef BLUE
+#undef MAGENTA
+#undef CYAN
+#undef WHITE
+
+
+void Simulator::set_trace_parameters(int parameters) {
+  bool disasm_before = trace_parameters_ & LOG_DISASM;
+  trace_parameters_ = parameters;
+  bool disasm_after = trace_parameters_ & LOG_DISASM;
+
+  if (disasm_before != disasm_after) {
+    if (disasm_after) {
+      decoder_->InsertVisitorBefore(print_disasm_, this);
+    } else {
+      decoder_->RemoveVisitor(print_disasm_);
+    }
+  }
+}
+
+
+void Simulator::set_instruction_stats(bool value) {
+  if (instrumentation_ == nullptr) {
+    return;
+  }
+
+  if (value != instruction_stats_) {
+    if (value) {
+      decoder_->AppendVisitor(instrumentation_);
+    } else {
+      decoder_->RemoveVisitor(instrumentation_);
+    }
+    instruction_stats_ = value;
+  }
+}
+
+// Helpers ---------------------------------------------------------------------
+uint64_t Simulator::AddWithCarry(unsigned reg_size,
+                                 bool set_flags,
+                                 uint64_t left,
+                                 uint64_t right,
+                                 int carry_in) {
+  VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
+  VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
+
+  uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
+  uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
+  uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
+
+  left &= reg_mask;
+  right &= reg_mask;
+  uint64_t result = (left + right + carry_in) & reg_mask;
+
+  if (set_flags) {
+    nzcv().SetN(CalcNFlag(result, reg_size));
+    nzcv().SetZ(CalcZFlag(result));
+
+    // Compute the C flag by comparing the result to the max unsigned integer.
+    uint64_t max_uint_2op = max_uint - carry_in;
+    bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
+    nzcv().SetC(C ? 1 : 0);
+
+    // Overflow iff the sign bit is the same for the two inputs and different
+    // for the result.
+    uint64_t left_sign = left & sign_mask;
+    uint64_t right_sign = right & sign_mask;
+    uint64_t result_sign = result & sign_mask;
+    bool V = (left_sign == right_sign) && (left_sign != result_sign);
+    nzcv().SetV(V ? 1 : 0);
+
+    LogSystemRegister(NZCV);
+  }
+  return result;
+}
+
+
+int64_t Simulator::ShiftOperand(unsigned reg_size,
+                                int64_t value,
+                                Shift shift_type,
+                                unsigned amount) {
+  if (amount == 0) {
+    return value;
+  }
+  int64_t mask = reg_size == kXRegSize ? kXRegMask : kWRegMask;
+  switch (shift_type) {
+    case LSL:
+      return (value << amount) & mask;
+    case LSR:
+      return static_cast<uint64_t>(value) >> amount;
+    case ASR: {
+      // Shift used to restore the sign.
+      unsigned s_shift = kXRegSize - reg_size;
+      // Value with its sign restored.
+      int64_t s_value = (value << s_shift) >> s_shift;
+      return (s_value >> amount) & mask;
+    }
+    case ROR: {
+      if (reg_size == kWRegSize) {
+        value &= kWRegMask;
+      }
+      return (static_cast<uint64_t>(value) >> amount) |
+             ((value & ((INT64_C(1) << amount) - 1)) <<
+              (reg_size - amount));
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      return 0;
+  }
+}
+
+
+int64_t Simulator::ExtendValue(unsigned reg_size,
+                               int64_t value,
+                               Extend extend_type,
+                               unsigned left_shift) {
+  switch (extend_type) {
+    case UXTB:
+      value &= kByteMask;
+      break;
+    case UXTH:
+      value &= kHalfWordMask;
+      break;
+    case UXTW:
+      value &= kWordMask;
+      break;
+    case SXTB:
+      value = (value << 56) >> 56;
+      break;
+    case SXTH:
+      value = (value << 48) >> 48;
+      break;
+    case SXTW:
+      value = (value << 32) >> 32;
+      break;
+    case UXTX:
+    case SXTX:
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+  int64_t mask = (reg_size == kXRegSize) ? kXRegMask : kWRegMask;
+  return (value << left_shift) & mask;
+}
+
+
+void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
+  AssertSupportedFPCR();
+
+  // TODO: This assumes that the C++ implementation handles comparisons in the
+  // way that we expect (as per AssertSupportedFPCR()).
+  bool process_exception = false;
+  if ((std::isnan(val0) != 0) || (std::isnan(val1) != 0)) {
+    nzcv().SetRawValue(FPUnorderedFlag);
+    if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
+        (trap == EnableTrap)) {
+      process_exception = true;
+    }
+  } else if (val0 < val1) {
+    nzcv().SetRawValue(FPLessThanFlag);
+  } else if (val0 > val1) {
+    nzcv().SetRawValue(FPGreaterThanFlag);
+  } else if (val0 == val1) {
+    nzcv().SetRawValue(FPEqualFlag);
+  } else {
+    VIXL_UNREACHABLE();
+  }
+  LogSystemRegister(NZCV);
+  if (process_exception) FPProcessException();
+}
+
+
+Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
+    unsigned reg_size, unsigned lane_size) {
+  VIXL_ASSERT(reg_size >= lane_size);
+
+  uint32_t format = 0;
+  if (reg_size != lane_size) {
+    switch (reg_size) {
+      default: VIXL_UNREACHABLE(); break;
+      case kQRegSizeInBytes: format = kPrintRegAsQVector; break;
+      case kDRegSizeInBytes: format = kPrintRegAsDVector; break;
+    }
+  }
+
+  switch (lane_size) {
+    default: VIXL_UNREACHABLE(); break;
+    case kQRegSizeInBytes: format |= kPrintReg1Q; break;
+    case kDRegSizeInBytes: format |= kPrintReg1D; break;
+    case kSRegSizeInBytes: format |= kPrintReg1S; break;
+    case kHRegSizeInBytes: format |= kPrintReg1H; break;
+    case kBRegSizeInBytes: format |= kPrintReg1B; break;
+  }
+  // These sizes would be duplicate case labels.
+  VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
+  VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
+  VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
+  VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
+
+  return static_cast<PrintRegisterFormat>(format);
+}
+
+
+Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
+    VectorFormat vform) {
+  switch (vform) {
+    default: VIXL_UNREACHABLE(); return kPrintReg16B;
+    case kFormat16B: return kPrintReg16B;
+    case kFormat8B: return kPrintReg8B;
+    case kFormat8H: return kPrintReg8H;
+    case kFormat4H: return kPrintReg4H;
+    case kFormat4S: return kPrintReg4S;
+    case kFormat2S: return kPrintReg2S;
+    case kFormat2D: return kPrintReg2D;
+    case kFormat1D: return kPrintReg1D;
+  }
+}
+
+
+void Simulator::PrintWrittenRegisters() {
+  for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+    if (registers_[i].WrittenSinceLastLog()) PrintRegister(i);
+  }
+}
+
+
+void Simulator::PrintWrittenVRegisters() {
+  for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
+    // At this point there is no type information, so print as a raw 1Q.
+    if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q);
+  }
+}
+
+
+void Simulator::PrintSystemRegisters() {
+  PrintSystemRegister(NZCV);
+  PrintSystemRegister(FPCR);
+}
+
+
+void Simulator::PrintRegisters() {
+  for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+    PrintRegister(i);
+  }
+}
+
+
+void Simulator::PrintVRegisters() {
+  for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
+    // At this point there is no type information, so print as a raw 1Q.
+    PrintVRegister(i, kPrintReg1Q);
+  }
+}
+
+
+// Print a register's name and raw value.
+//
+// Only the least-significant `size_in_bytes` bytes of the register are printed,
+// but the value is aligned as if the whole register had been printed.
+//
+// For typical register updates, size_in_bytes should be set to kXRegSizeInBytes
+// -- the default -- so that the whole register is printed. Other values of
+// size_in_bytes are intended for use when the register hasn't actually been
+// updated (such as in PrintWrite).
+//
+// No newline is printed. This allows the caller to print more details (such as
+// a memory access annotation).
+void Simulator::PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
+                                       int size_in_bytes) {
+  // The template for all supported sizes.
+  //   "# x{code}: 0xffeeddccbbaa9988"
+  //   "# w{code}:         0xbbaa9988"
+  //   "# w{code}<15:0>:       0x9988"
+  //   "# w{code}<7:0>:          0x88"
+  unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2;
+
+  const char * name = "";
+  const char * suffix = "";
+  switch (size_in_bytes) {
+    case kXRegSizeInBytes: name = XRegNameForCode(code, r31mode); break;
+    case kWRegSizeInBytes: name = WRegNameForCode(code, r31mode); break;
+    case 2:
+      name = WRegNameForCode(code, r31mode);
+      suffix = "<15:0>";
+      padding_chars -= strlen(suffix);
+      break;
+    case 1:
+      name = WRegNameForCode(code, r31mode);
+      suffix = "<7:0>";
+      padding_chars -= strlen(suffix);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+  fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix);
+
+  // Print leading padding spaces.
+  VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2));
+  for (unsigned i = 0; i < padding_chars; i++) {
+    putc(' ', stream_);
+  }
+
+  // Print the specified bits in hexadecimal format.
+  uint64_t bits = reg<uint64_t>(code, r31mode);
+  bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8);
+  VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes);
+
+  int chars = size_in_bytes * 2;
+  fprintf(stream_, "%s0x%0*" PRIx64 "%s",
+          clr_reg_value, chars, bits, clr_normal);
+}
+
+
+void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) {
+  registers_[code].NotifyRegisterLogged();
+
+  // Don't print writes into xzr.
+  if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) {
+    return;
+  }
+
+  // The template for all x and w registers:
+  //   "# x{code}: 0x{value}"
+  //   "# w{code}: 0x{value}"
+
+  PrintRegisterRawHelper(code, r31mode);
+  fprintf(stream_, "\n");
+}
+
+
+// Print a register's name and raw value.
+//
+// The `bytes` and `lsb` arguments can be used to limit the bytes that are
+// printed. These arguments are intended for use in cases where register hasn't
+// actually been updated (such as in PrintVWrite).
+//
+// No newline is printed. This allows the caller to print more details (such as
+// a floating-point interpretation or a memory access annotation).
+void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) {
+  // The template for vector types:
+  //   "# v{code}: 0xffeeddccbbaa99887766554433221100".
+  // An example with bytes=4 and lsb=8:
+  //   "# v{code}:         0xbbaa9988                ".
+  fprintf(stream_, "# %s%5s: %s",
+          clr_vreg_name, VRegNameForCode(code), clr_vreg_value);
+
+  int msb = lsb + bytes - 1;
+  int byte = kQRegSizeInBytes - 1;
+
+  // Print leading padding spaces. (Two spaces per byte.)
+  while (byte > msb) {
+    fprintf(stream_, "  ");
+    byte--;
+  }
+
+  // Print the specified part of the value, byte by byte.
+  qreg_t rawbits = qreg(code);
+  fprintf(stream_, "0x");
+  while (byte >= lsb) {
+    fprintf(stream_, "%02x", rawbits.val[byte]);
+    byte--;
+  }
+
+  // Print trailing padding spaces.
+  while (byte >= 0) {
+    fprintf(stream_, "  ");
+    byte--;
+  }
+  fprintf(stream_, "%s", clr_normal);
+}
+
+
+// Print each of the specified lanes of a register as a float or double value.
+//
+// The `lane_count` and `lslane` arguments can be used to limit the lanes that
+// are printed. These arguments are intended for use in cases where register
+// hasn't actually been updated (such as in PrintVWrite).
+//
+// No newline is printed. This allows the caller to print more details (such as
+// a memory access annotation).
+void Simulator::PrintVRegisterFPHelper(unsigned code,
+                                       unsigned lane_size_in_bytes,
+                                       int lane_count,
+                                       int rightmost_lane) {
+  VIXL_ASSERT((lane_size_in_bytes == kSRegSizeInBytes) ||
+              (lane_size_in_bytes == kDRegSizeInBytes));
+
+  unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes);
+  VIXL_ASSERT(msb <= kQRegSizeInBytes);
+
+  // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register
+  // name is used:
+  //   " (s{code}: {value})"
+  //   " (d{code}: {value})"
+  // For vector types, "..." is used to represent one or more omitted lanes.
+  //   " (..., {value}, {value}, ...)"
+  if ((lane_count == 1) && (rightmost_lane == 0)) {
+    const char * name =
+        (lane_size_in_bytes == kSRegSizeInBytes) ? SRegNameForCode(code)
+                                                 : DRegNameForCode(code);
+    fprintf(stream_, " (%s%s: ", clr_vreg_name, name);
+  } else {
+    if (msb < (kQRegSizeInBytes - 1)) {
+      fprintf(stream_, " (..., ");
+    } else {
+      fprintf(stream_, " (");
+    }
+  }
+
+  // Print the list of values.
+  const char * separator = "";
+  int leftmost_lane = rightmost_lane + lane_count - 1;
+  for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) {
+    double value =
+        (lane_size_in_bytes == kSRegSizeInBytes) ? vreg(code).Get<float>(lane)
+                                                 : vreg(code).Get<double>(lane);
+    fprintf(stream_, "%s%s%#g%s", separator, clr_vreg_value, value, clr_normal);
+    separator = ", ";
+  }
+
+  if (rightmost_lane > 0) {
+    fprintf(stream_, ", ...");
+  }
+  fprintf(stream_, ")");
+}
+
+
+void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) {
+  vregisters_[code].NotifyRegisterLogged();
+
+  int lane_size_log2 = format & kPrintRegLaneSizeMask;
+
+  int reg_size_log2;
+  if (format & kPrintRegAsQVector) {
+    reg_size_log2 = kQRegSizeInBytesLog2;
+  } else if (format & kPrintRegAsDVector) {
+    reg_size_log2 = kDRegSizeInBytesLog2;
+  } else {
+    // Scalar types.
+    reg_size_log2 = lane_size_log2;
+  }
+
+  int lane_count = 1 << (reg_size_log2 - lane_size_log2);
+  int lane_size = 1 << lane_size_log2;
+
+  // The template for vector types:
+  //   "# v{code}: 0x{rawbits} (..., {value}, ...)".
+  // The template for scalar types:
+  //   "# v{code}: 0x{rawbits} ({reg}:{value})".
+  // The values in parentheses after the bit representations are floating-point
+  // interpretations. They are displayed only if the kPrintVRegAsFP bit is set.
+
+  PrintVRegisterRawHelper(code);
+  if (format & kPrintRegAsFP) {
+    PrintVRegisterFPHelper(code, lane_size, lane_count);
+  }
+
+  fprintf(stream_, "\n");
+}
+
+
+void Simulator::PrintSystemRegister(SystemRegister id) {
+  switch (id) {
+    case NZCV:
+      fprintf(stream_, "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
+              clr_flag_name, clr_flag_value,
+              nzcv().N(), nzcv().Z(), nzcv().C(), nzcv().V(),
+              clr_normal);
+      break;
+    case FPCR: {
+      static const char * rmode[] = {
+        "0b00 (Round to Nearest)",
+        "0b01 (Round towards Plus Infinity)",
+        "0b10 (Round towards Minus Infinity)",
+        "0b11 (Round towards Zero)"
+      };
+      VIXL_ASSERT(fpcr().RMode() < (sizeof(rmode) / sizeof(rmode[0])));
+      fprintf(stream_,
+              "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
+              clr_flag_name, clr_flag_value,
+              fpcr().AHP(), fpcr().DN(), fpcr().FZ(), rmode[fpcr().RMode()],
+              clr_normal);
+      break;
+    }
+    default:
+      VIXL_UNREACHABLE();
+  }
+}
+
+
+void Simulator::PrintRead(uintptr_t address,
+                          unsigned reg_code,
+                          PrintRegisterFormat format) {
+  registers_[reg_code].NotifyRegisterLogged();
+
+  USE(format);
+
+  // The template is "# {reg}: 0x{value} <- {address}".
+  PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister);
+  fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintVRead(uintptr_t address,
+                           unsigned reg_code,
+                           PrintRegisterFormat format,
+                           unsigned lane) {
+  vregisters_[reg_code].NotifyRegisterLogged();
+
+  // The template is "# v{code}: 0x{rawbits} <- address".
+  PrintVRegisterRawHelper(reg_code);
+  if (format & kPrintRegAsFP) {
+    PrintVRegisterFPHelper(reg_code, GetPrintRegLaneSizeInBytes(format),
+                           GetPrintRegLaneCount(format), lane);
+  }
+  fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintWrite(uintptr_t address,
+                           unsigned reg_code,
+                           PrintRegisterFormat format) {
+  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+
+  // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy
+  // and readable, the value is aligned with the values in the register trace.
+  PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister,
+                         GetPrintRegSizeInBytes(format));
+  fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintVWrite(uintptr_t address,
+                            unsigned reg_code,
+                            PrintRegisterFormat format,
+                            unsigned lane) {
+  // The templates:
+  //   "# v{code}: 0x{rawbits} -> {address}"
+  //   "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}".
+  //   "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}"
+  // Because this trace doesn't represent a change to the source register's
+  // value, only the relevant part of the value is printed. To keep the trace
+  // tidy and readable, the raw value is aligned with the other values in the
+  // register trace.
+  int lane_count = GetPrintRegLaneCount(format);
+  int lane_size = GetPrintRegLaneSizeInBytes(format);
+  int reg_size = GetPrintRegSizeInBytes(format);
+  PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane);
+  if (format & kPrintRegAsFP) {
+    PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane);
+  }
+  fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address, address, clr_normal);
+}
+
+
+// Visitors---------------------------------------------------------------------
+
+void Simulator::VisitUnimplemented(const Instruction* instr) {
+  printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
+         reinterpret_cast<const void*>(instr), instr->InstructionBits());
+  VIXL_UNIMPLEMENTED();
+}
+
+
+void Simulator::VisitUnallocated(const Instruction* instr) {
+  printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
+         reinterpret_cast<const void*>(instr), instr->InstructionBits());
+  VIXL_UNIMPLEMENTED();
+}
+
+
+void Simulator::VisitPCRelAddressing(const Instruction* instr) {
+  VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
+              (instr->Mask(PCRelAddressingMask) == ADRP));
+
+  set_reg(instr->Rd(), instr->ImmPCOffsetTarget());
+}
+
+
+void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
+  switch (instr->Mask(UnconditionalBranchMask)) {
+    case BL:
+      set_lr(instr->NextInstruction());
+      VIXL_FALLTHROUGH();
+    case B:
+      set_pc(instr->ImmPCOffsetTarget());
+      break;
+    default: VIXL_UNREACHABLE();
+  }
+}
+
+
+void Simulator::VisitConditionalBranch(const Instruction* instr) {
+  VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
+  if (ConditionPassed(instr->ConditionBranch())) {
+    set_pc(instr->ImmPCOffsetTarget());
+  }
+}
+
+
+void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
+  const Instruction* target = Instruction::Cast(xreg(instr->Rn()));
+
+  switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
+    case BLR:
+      set_lr(instr->NextInstruction());
+      VIXL_FALLTHROUGH();
+    case BR:
+    case RET: set_pc(target); break;
+    default: VIXL_UNREACHABLE();
+  }
+}
+
+
+void Simulator::VisitTestBranch(const Instruction* instr) {
+  unsigned bit_pos = (instr->ImmTestBranchBit5() << 5) |
+                     instr->ImmTestBranchBit40();
+  bool bit_zero = ((xreg(instr->Rt()) >> bit_pos) & 1) == 0;
+  bool take_branch = false;
+  switch (instr->Mask(TestBranchMask)) {
+    case TBZ: take_branch = bit_zero; break;
+    case TBNZ: take_branch = !bit_zero; break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+  if (take_branch) {
+    set_pc(instr->ImmPCOffsetTarget());
+  }
+}
+
+
+void Simulator::VisitCompareBranch(const Instruction* instr) {
+  unsigned rt = instr->Rt();
+  bool take_branch = false;
+  switch (instr->Mask(CompareBranchMask)) {
+    case CBZ_w: take_branch = (wreg(rt) == 0); break;
+    case CBZ_x: take_branch = (xreg(rt) == 0); break;
+    case CBNZ_w: take_branch = (wreg(rt) != 0); break;
+    case CBNZ_x: take_branch = (xreg(rt) != 0); break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+  if (take_branch) {
+    set_pc(instr->ImmPCOffsetTarget());
+  }
+}
+
+
+void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  bool set_flags = instr->FlagsUpdate();
+  int64_t new_val = 0;
+  Instr operation = instr->Mask(AddSubOpMask);
+
+  switch (operation) {
+    case ADD:
+    case ADDS: {
+      new_val = AddWithCarry(reg_size,
+                             set_flags,
+                             reg(reg_size, instr->Rn(), instr->RnMode()),
+                             op2);
+      break;
+    }
+    case SUB:
+    case SUBS: {
+      new_val = AddWithCarry(reg_size,
+                             set_flags,
+                             reg(reg_size, instr->Rn(), instr->RnMode()),
+                             ~op2,
+                             1);
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+
+  set_reg(reg_size, instr->Rd(), new_val, LogRegWrites, instr->RdMode());
+}
+
+
+void Simulator::VisitAddSubShifted(const Instruction* instr) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  int64_t op2 = ShiftOperand(reg_size,
+                             reg(reg_size, instr->Rm()),
+                             static_cast<Shift>(instr->ShiftDP()),
+                             instr->ImmDPShift());
+  AddSubHelper(instr, op2);
+}
+
+
+void Simulator::VisitAddSubImmediate(const Instruction* instr) {
+  int64_t op2 = instr->ImmAddSub() << ((instr->ShiftAddSub() == 1) ? 12 : 0);
+  AddSubHelper(instr, op2);
+}
+
+
+void Simulator::VisitAddSubExtended(const Instruction* instr) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  int64_t op2 = ExtendValue(reg_size,
+                            reg(reg_size, instr->Rm()),
+                            static_cast<Extend>(instr->ExtendMode()),
+                            instr->ImmExtendShift());
+  AddSubHelper(instr, op2);
+}
+
+
+void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  int64_t op2 = reg(reg_size, instr->Rm());
+  int64_t new_val;
+
+  if ((instr->Mask(AddSubOpMask) == SUB) || instr->Mask(AddSubOpMask) == SUBS) {
+    op2 = ~op2;
+  }
+
+  new_val = AddWithCarry(reg_size,
+                         instr->FlagsUpdate(),
+                         reg(reg_size, instr->Rn()),
+                         op2,
+                         C());
+
+  set_reg(reg_size, instr->Rd(), new_val);
+}
+
+
+void Simulator::VisitLogicalShifted(const Instruction* instr) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  Shift shift_type = static_cast<Shift>(instr->ShiftDP());
+  unsigned shift_amount = instr->ImmDPShift();
+  int64_t op2 = ShiftOperand(reg_size, reg(reg_size, instr->Rm()), shift_type,
+                             shift_amount);
+  if (instr->Mask(NOT) == NOT) {
+    op2 = ~op2;
+  }
+  LogicalHelper(instr, op2);
+}
+
+
+void Simulator::VisitLogicalImmediate(const Instruction* instr) {
+  LogicalHelper(instr, instr->ImmLogical());
+}
+
+
+void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  int64_t op1 = reg(reg_size, instr->Rn());
+  int64_t result = 0;
+  bool update_flags = false;
+
+  // Switch on the logical operation, stripping out the NOT bit, as it has a
+  // different meaning for logical immediate instructions.
+  switch (instr->Mask(LogicalOpMask & ~NOT)) {
+    case ANDS: update_flags = true; VIXL_FALLTHROUGH();
+    case AND: result = op1 & op2; break;
+    case ORR: result = op1 | op2; break;
+    case EOR: result = op1 ^ op2; break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+
+  if (update_flags) {
+    nzcv().SetN(CalcNFlag(result, reg_size));
+    nzcv().SetZ(CalcZFlag(result));
+    nzcv().SetC(0);
+    nzcv().SetV(0);
+    LogSystemRegister(NZCV);
+  }
+
+  set_reg(reg_size, instr->Rd(), result, LogRegWrites, instr->RdMode());
+}
+
+
+void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  ConditionalCompareHelper(instr, reg(reg_size, instr->Rm()));
+}
+
+
+void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
+  ConditionalCompareHelper(instr, instr->ImmCondCmp());
+}
+
+
+void Simulator::ConditionalCompareHelper(const Instruction* instr,
+                                         int64_t op2) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  int64_t op1 = reg(reg_size, instr->Rn());
+
+  if (ConditionPassed(instr->Condition())) {
+    // If the condition passes, set the status flags to the result of comparing
+    // the operands.
+    if (instr->Mask(ConditionalCompareMask) == CCMP) {
+      AddWithCarry(reg_size, true, op1, ~op2, 1);
+    } else {
+      VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
+      AddWithCarry(reg_size, true, op1, op2, 0);
+    }
+  } else {
+    // If the condition fails, set the status flags to the nzcv immediate.
+    nzcv().SetFlags(instr->Nzcv());
+    LogSystemRegister(NZCV);
+  }
+}
+
+
+void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
+  int offset = instr->ImmLSUnsigned() << instr->SizeLS();
+  LoadStoreHelper(instr, offset, Offset);
+}
+
+
+void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
+  LoadStoreHelper(instr, instr->ImmLS(), Offset);
+}
+
+
+void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
+  LoadStoreHelper(instr, instr->ImmLS(), PreIndex);
+}
+
+
+void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
+  LoadStoreHelper(instr, instr->ImmLS(), PostIndex);
+}
+
+
+void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
+  Extend ext = static_cast<Extend>(instr->ExtendMode());
+  VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
+  unsigned shift_amount = instr->ImmShiftLS() * instr->SizeLS();
+
+  int64_t offset = ExtendValue(kXRegSize, xreg(instr->Rm()), ext,
+                               shift_amount);
+  LoadStoreHelper(instr, offset, Offset);
+}
+
+template<typename T>
+static T Faulted() {
+    return ~0;
+}
+
+template<>
+Simulator::qreg_t Faulted() {
+    static_assert(kQRegSizeInBytes == 16, "Known constraint");
+    static Simulator::qreg_t dummy = { {
+	255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255
+    } };
+    return dummy;
+}
+
+template<typename T> T
+Simulator::Read(uintptr_t address)
+{
+    address = Memory::AddressUntag(address);
+    if (handle_wasm_seg_fault(address, sizeof(T)))
+	return Faulted<T>();
+    return Memory::Read<T>(address);
+}
+
+template <typename T> void
+Simulator::Write(uintptr_t address, T value)
+{
+    address = Memory::AddressUntag(address);
+    if (handle_wasm_seg_fault(address, sizeof(T)))
+	return;
+    Memory::Write<T>(address, value);
+}
+
+void Simulator::LoadStoreHelper(const Instruction* instr,
+                                int64_t offset,
+                                AddrMode addrmode) {
+  unsigned srcdst = instr->Rt();
+  uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
+
+  LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
+  switch (op) {
+    case LDRB_w:
+      set_wreg(srcdst, Read<uint8_t>(address), NoRegLog); break;
+    case LDRH_w:
+      set_wreg(srcdst, Read<uint16_t>(address), NoRegLog); break;
+    case LDR_w:
+      set_wreg(srcdst, Read<uint32_t>(address), NoRegLog); break;
+    case LDR_x:
+      set_xreg(srcdst, Read<uint64_t>(address), NoRegLog); break;
+    case LDRSB_w:
+      set_wreg(srcdst, Read<int8_t>(address), NoRegLog); break;
+    case LDRSH_w:
+      set_wreg(srcdst, Read<int16_t>(address), NoRegLog); break;
+    case LDRSB_x:
+      set_xreg(srcdst, Read<int8_t>(address), NoRegLog); break;
+    case LDRSH_x:
+      set_xreg(srcdst, Read<int16_t>(address), NoRegLog); break;
+    case LDRSW_x:
+      set_xreg(srcdst, Read<int32_t>(address), NoRegLog); break;
+    case LDR_b:
+      set_breg(srcdst, Read<uint8_t>(address), NoRegLog); break;
+    case LDR_h:
+      set_hreg(srcdst, Read<uint16_t>(address), NoRegLog); break;
+    case LDR_s:
+      set_sreg(srcdst, Read<float>(address), NoRegLog); break;
+    case LDR_d:
+      set_dreg(srcdst, Read<double>(address), NoRegLog); break;
+    case LDR_q:
+      set_qreg(srcdst, Read<qreg_t>(address), NoRegLog); break;
+
+    case STRB_w:  Write<uint8_t>(address, wreg(srcdst)); break;
+    case STRH_w:  Write<uint16_t>(address, wreg(srcdst)); break;
+    case STR_w:   Write<uint32_t>(address, wreg(srcdst)); break;
+    case STR_x:   Write<uint64_t>(address, xreg(srcdst)); break;
+    case STR_b:   Write<uint8_t>(address, breg(srcdst)); break;
+    case STR_h:   Write<uint16_t>(address, hreg(srcdst)); break;
+    case STR_s:   Write<float>(address, sreg(srcdst)); break;
+    case STR_d:   Write<double>(address, dreg(srcdst)); break;
+    case STR_q:   Write<qreg_t>(address, qreg(srcdst)); break;
+
+    // Ignore prfm hint instructions.
+    case PRFM: break;
+
+    default: VIXL_UNIMPLEMENTED();
+  }
+
+  unsigned access_size = 1 << instr->SizeLS();
+  if (instr->IsLoad()) {
+    if ((op == LDR_s) || (op == LDR_d)) {
+      LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
+    } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) {
+      LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+    } else {
+      LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+    }
+  } else {
+    if ((op == STR_s) || (op == STR_d)) {
+      LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
+    } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) {
+      LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+    } else {
+      LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+    }
+  }
+
+  local_monitor_.MaybeClear();
+}
+
+
+void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
+  LoadStorePairHelper(instr, Offset);
+}
+
+
+void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
+  LoadStorePairHelper(instr, PreIndex);
+}
+
+
+void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
+  LoadStorePairHelper(instr, PostIndex);
+}
+
+
+void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
+  LoadStorePairHelper(instr, Offset);
+}
+
+
+void Simulator::LoadStorePairHelper(const Instruction* instr,
+                                    AddrMode addrmode) {
+  unsigned rt = instr->Rt();
+  unsigned rt2 = instr->Rt2();
+  int element_size = 1 << instr->SizeLSPair();
+  int64_t offset = instr->ImmLSPair() * element_size;
+  uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
+  uintptr_t address2 = address + element_size;
+
+  LoadStorePairOp op =
+    static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
+
+  // 'rt' and 'rt2' can only be aliased for stores.
+  VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
+
+  switch (op) {
+    // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
+    // will print a more detailed log.
+    case LDP_w: {
+      set_wreg(rt, Read<uint32_t>(address), NoRegLog);
+      set_wreg(rt2, Read<uint32_t>(address2), NoRegLog);
+      break;
+    }
+    case LDP_s: {
+      set_sreg(rt, Read<float>(address), NoRegLog);
+      set_sreg(rt2, Read<float>(address2), NoRegLog);
+      break;
+    }
+    case LDP_x: {
+      set_xreg(rt, Read<uint64_t>(address), NoRegLog);
+      set_xreg(rt2, Read<uint64_t>(address2), NoRegLog);
+      break;
+    }
+    case LDP_d: {
+      set_dreg(rt, Read<double>(address), NoRegLog);
+      set_dreg(rt2, Read<double>(address2), NoRegLog);
+      break;
+    }
+    case LDP_q: {
+      set_qreg(rt, Read<qreg_t>(address), NoRegLog);
+      set_qreg(rt2, Read<qreg_t>(address2), NoRegLog);
+      break;
+    }
+    case LDPSW_x: {
+      set_xreg(rt, Read<int32_t>(address), NoRegLog);
+      set_xreg(rt2, Read<int32_t>(address2), NoRegLog);
+      break;
+    }
+    case STP_w: {
+      Write<uint32_t>(address, wreg(rt));
+      Write<uint32_t>(address2, wreg(rt2));
+      break;
+    }
+    case STP_s: {
+      Write<float>(address, sreg(rt));
+      Write<float>(address2, sreg(rt2));
+      break;
+    }
+    case STP_x: {
+      Write<uint64_t>(address, xreg(rt));
+      Write<uint64_t>(address2, xreg(rt2));
+      break;
+    }
+    case STP_d: {
+      Write<double>(address, dreg(rt));
+      Write<double>(address2, dreg(rt2));
+      break;
+    }
+    case STP_q: {
+      Write<qreg_t>(address, qreg(rt));
+      Write<qreg_t>(address2, qreg(rt2));
+      break;
+    }
+    default: VIXL_UNREACHABLE();
+  }
+
+  // Print a detailed trace (including the memory address) instead of the basic
+  // register:value trace generated by set_*reg().
+  if (instr->IsLoad()) {
+    if ((op == LDP_s) || (op == LDP_d)) {
+      LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
+      LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
+    } else if (op == LDP_q) {
+      LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+      LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+    } else {
+      LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+      LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+    }
+  } else {
+    if ((op == STP_s) || (op == STP_d)) {
+      LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
+      LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
+    } else if (op == STP_q) {
+      LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+      LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+    } else {
+      LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+      LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+    }
+  }
+
+  local_monitor_.MaybeClear();
+}
+
+
+void Simulator::PrintExclusiveAccessWarning() {
+  if (print_exclusive_access_warning_) {
+    fprintf(
+        stderr,
+        "%sWARNING:%s VIXL simulator support for load-/store-/clear-exclusive "
+        "instructions is limited. Refer to the README for details.%s\n",
+        clr_warning, clr_warning_message, clr_normal);
+    print_exclusive_access_warning_ = false;
+  }
+}
+
+template <typename T>
+void Simulator::CompareAndSwapHelper(const Instruction* instr) {
+  unsigned rs = instr->Rs();
+  unsigned rt = instr->Rt();
+  unsigned rn = instr->Rn();
+
+  unsigned element_size = sizeof(T);
+  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+  // Verify that the address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+  address = Memory::AddressUntag(address);
+  if (handle_wasm_seg_fault(address, element_size))
+    return;
+
+  bool is_acquire = instr->Bit(22) == 1;
+  bool is_release = instr->Bit(15) == 1;
+
+  T comparevalue = reg<T>(rs);
+  T newvalue = reg<T>(rt);
+
+  // The architecture permits that the data read clears any exclusive monitors
+  // associated with that location, even if the compare subsequently fails.
+  local_monitor_.Clear();
+
+  T data = Memory::Read<T>(address);
+  if (is_acquire) {
+    // Approximate load-acquire by issuing a full barrier after the load.
+    __sync_synchronize();
+  }
+
+  if (data == comparevalue) {
+    if (is_release) {
+      // Approximate store-release by issuing a full barrier before the store.
+      __sync_synchronize();
+    }
+    Memory::Write<T>(address, newvalue);
+    LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+  }
+  set_reg<T>(rs, data);
+  LogRead(address, rs, GetPrintRegisterFormatForSize(element_size));
+}
+
+template <typename T>
+void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
+  VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8));
+  unsigned rs = instr->Rs();
+  unsigned rt = instr->Rt();
+  unsigned rn = instr->Rn();
+
+  VIXL_ASSERT((rs % 2 == 0) && (rs % 2 == 0));
+
+  unsigned element_size = sizeof(T);
+  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+  // Verify that the address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+  address = Memory::AddressUntag(address);
+  if (handle_wasm_seg_fault(address, element_size))
+    return;
+
+  uint64_t address2 = address + element_size;
+
+  bool is_acquire = instr->Bit(22) == 1;
+  bool is_release = instr->Bit(15) == 1;
+
+  T comparevalue_high = reg<T>(rs + 1);
+  T comparevalue_low = reg<T>(rs);
+  T newvalue_high = reg<T>(rt + 1);
+  T newvalue_low = reg<T>(rt);
+
+  // The architecture permits that the data read clears any exclusive monitors
+  // associated with that location, even if the compare subsequently fails.
+  local_monitor_.Clear();
+
+  T data_high = Memory::Read<T>(address);
+  T data_low = Memory::Read<T>(address2);
+
+  if (is_acquire) {
+    // Approximate load-acquire by issuing a full barrier after the load.
+    __sync_synchronize();
+  }
+
+  bool same =
+      (data_high == comparevalue_high) && (data_low == comparevalue_low);
+  if (same) {
+    if (is_release) {
+      // Approximate store-release by issuing a full barrier before the store.
+      __sync_synchronize();
+    }
+
+    Memory::Write<T>(address, newvalue_high);
+    Memory::Write<T>(address2, newvalue_low);
+  }
+
+  set_reg<T>(rs + 1, data_high);
+  set_reg<T>(rs, data_low);
+
+  LogRead(address, rs + 1, GetPrintRegisterFormatForSize(element_size));
+  LogRead(address2, rs, GetPrintRegisterFormatForSize(element_size));
+
+  if (same) {
+    LogWrite(address, rt + 1, GetPrintRegisterFormatForSize(element_size));
+    LogWrite(address2, rt, GetPrintRegisterFormatForSize(element_size));
+  }
+}
+
+void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
+  LoadStoreExclusive op =
+      static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
+
+  switch (op) {
+    case CAS_w:
+    case CASA_w:
+    case CASL_w:
+    case CASAL_w:
+      CompareAndSwapHelper<uint32_t>(instr);
+      break;
+    case CAS_x:
+    case CASA_x:
+    case CASL_x:
+    case CASAL_x:
+      CompareAndSwapHelper<uint64_t>(instr);
+      break;
+    case CASB:
+    case CASAB:
+    case CASLB:
+    case CASALB:
+      CompareAndSwapHelper<uint8_t>(instr);
+      break;
+    case CASH:
+    case CASAH:
+    case CASLH:
+    case CASALH:
+      CompareAndSwapHelper<uint16_t>(instr);
+      break;
+    case CASP_w:
+    case CASPA_w:
+    case CASPL_w:
+    case CASPAL_w:
+      CompareAndSwapPairHelper<uint32_t>(instr);
+      break;
+    case CASP_x:
+    case CASPA_x:
+    case CASPL_x:
+    case CASPAL_x:
+      CompareAndSwapPairHelper<uint64_t>(instr);
+      break;
+    default:
+      PrintExclusiveAccessWarning();
+
+      unsigned rs = instr->Rs();
+      unsigned rt = instr->Rt();
+      unsigned rt2 = instr->Rt2();
+      unsigned rn = instr->Rn();
+
+      bool is_exclusive = !instr->LdStXNotExclusive();
+      bool is_acquire_release = !is_exclusive || instr->LdStXAcquireRelease();
+      bool is_load = instr->LdStXLoad();
+      bool is_pair = instr->LdStXPair();
+
+      unsigned element_size = 1 << instr->LdStXSizeLog2();
+      unsigned access_size = is_pair ? element_size * 2 : element_size;
+      uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+      // Verify that the address is available to the host.
+      VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+      // Check the alignment of `address`.
+      if (AlignDown(address, access_size) != address) {
+        VIXL_ALIGNMENT_EXCEPTION();
+      }
+
+      // The sp must be aligned to 16 bytes when it is accessed.
+      if ((rn == 31) && (AlignDown(address, 16) != address)) {
+        VIXL_ALIGNMENT_EXCEPTION();
+      }
+
+      if (is_load) {
+        if (is_exclusive) {
+          local_monitor_.MarkExclusive(address, access_size);
+        } else {
+          // Any non-exclusive load can clear the local monitor as a side
+          // effect. We don't need to do this, but it is useful to stress the
+          // simulated code.
+          local_monitor_.Clear();
+        }
+
+        // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
+        // We will print a more detailed log.
+        switch (op) {
+          case LDXRB_w:
+          case LDAXRB_w:
+          case LDARB_w:
+            set_wreg(rt, Read<uint8_t>(address), NoRegLog);
+            break;
+          case LDXRH_w:
+          case LDAXRH_w:
+          case LDARH_w:
+            set_wreg(rt, Read<uint16_t>(address), NoRegLog);
+            break;
+          case LDXR_w:
+          case LDAXR_w:
+          case LDAR_w:
+            set_wreg(rt, Read<uint32_t>(address), NoRegLog);
+            break;
+          case LDXR_x:
+          case LDAXR_x:
+          case LDAR_x:
+            set_xreg(rt, Read<uint64_t>(address), NoRegLog);
+            break;
+          case LDXP_w:
+          case LDAXP_w:
+            set_wreg(rt, Read<uint32_t>(address), NoRegLog);
+            set_wreg(rt2, Read<uint32_t>(address + element_size), NoRegLog);
+            break;
+          case LDXP_x:
+          case LDAXP_x:
+            set_xreg(rt, Read<uint64_t>(address), NoRegLog);
+            set_xreg(rt2, Read<uint64_t>(address + element_size), NoRegLog);
+            break;
+          default:
+            VIXL_UNREACHABLE();
+        }
+
+        if (is_acquire_release) {
+          // Approximate load-acquire by issuing a full barrier after the load.
+          js::jit::AtomicOperations::fenceSeqCst();
+        }
+
+        LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+        if (is_pair) {
+          LogRead(address + element_size, rt2,
+                  GetPrintRegisterFormatForSize(element_size));
+        }
+      } else {
+        if (is_acquire_release) {
+          // Approximate store-release by issuing a full barrier before the
+          // store.
+          js::jit::AtomicOperations::fenceSeqCst();
+        }
+
+        bool do_store = true;
+        if (is_exclusive) {
+          do_store = local_monitor_.IsExclusive(address, access_size) &&
+                     global_monitor_.IsExclusive(address, access_size);
+          set_wreg(rs, do_store ? 0 : 1);
+
+          //  - All exclusive stores explicitly clear the local monitor.
+          local_monitor_.Clear();
+        } else {
+          //  - Any other store can clear the local monitor as a side effect.
+          local_monitor_.MaybeClear();
+        }
+
+        if (do_store) {
+          switch (op) {
+            case STXRB_w:
+            case STLXRB_w:
+            case STLRB_w:
+              Write<uint8_t>(address, wreg(rt));
+              break;
+            case STXRH_w:
+            case STLXRH_w:
+            case STLRH_w:
+              Write<uint16_t>(address, wreg(rt));
+              break;
+            case STXR_w:
+            case STLXR_w:
+            case STLR_w:
+              Write<uint32_t>(address, wreg(rt));
+              break;
+            case STXR_x:
+            case STLXR_x:
+            case STLR_x:
+              Write<uint64_t>(address, xreg(rt));
+              break;
+            case STXP_w:
+            case STLXP_w:
+              Write<uint32_t>(address, wreg(rt));
+              Write<uint32_t>(address + element_size, wreg(rt2));
+              break;
+            case STXP_x:
+            case STLXP_x:
+              Write<uint64_t>(address, xreg(rt));
+              Write<uint64_t>(address + element_size, xreg(rt2));
+              break;
+            default:
+              VIXL_UNREACHABLE();
+          }
+
+          LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+          if (is_pair) {
+            LogWrite(address + element_size, rt2,
+                     GetPrintRegisterFormatForSize(element_size));
+          }
+        }
+      }
+  }
+}
+
+template <typename T>
+void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
+  unsigned rs = instr->Rs();
+  unsigned rt = instr->Rt();
+  unsigned rn = instr->Rn();
+
+  bool is_acquire = (instr->Bit(23) == 1) && (rt != kZeroRegCode);
+  bool is_release = instr->Bit(22) == 1;
+
+  unsigned element_size = sizeof(T);
+  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+  // Verify that the address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+  address = Memory::AddressUntag(address);
+  if (handle_wasm_seg_fault(address, sizeof(T)))
+    return;
+
+  T value = reg<T>(rs);
+
+  T data = Memory::Read<T>(address);
+
+  if (is_acquire) {
+    // Approximate load-acquire by issuing a full barrier after the load.
+    __sync_synchronize();
+  }
+
+  T result = 0;
+  switch (instr->Mask(AtomicMemorySimpleOpMask)) {
+    case LDADDOp:
+      result = data + value;
+      break;
+    case LDCLROp:
+      VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+      result = data & ~value;
+      break;
+    case LDEOROp:
+      VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+      result = data ^ value;
+      break;
+    case LDSETOp:
+      VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+      result = data | value;
+      break;
+
+    // Signed/Unsigned difference is done via the templated type T.
+    case LDSMAXOp:
+    case LDUMAXOp:
+      result = (data > value) ? data : value;
+      break;
+    case LDSMINOp:
+    case LDUMINOp:
+      result = (data > value) ? value : data;
+      break;
+  }
+
+  if (is_release) {
+    // Approximate store-release by issuing a full barrier before the store.
+    __sync_synchronize();
+  }
+
+  Memory::Write<T>(address, result);
+  set_reg<T>(rt, data, NoRegLog);
+
+  LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+  LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size));
+}
+
+template <typename T>
+void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
+  unsigned rs = instr->Rs();
+  unsigned rt = instr->Rt();
+  unsigned rn = instr->Rn();
+
+  bool is_acquire = (instr->Bit(23) == 1) && (rt != kZeroRegCode);
+  bool is_release = instr->Bit(22) == 1;
+
+  unsigned element_size = sizeof(T);
+  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+  // Verify that the address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+  address = Memory::AddressUntag(address);
+  if (handle_wasm_seg_fault(address, sizeof(T)))
+    return;
+
+  T data = Memory::Read<T>(address);
+  if (is_acquire) {
+    // Approximate load-acquire by issuing a full barrier after the load.
+    __sync_synchronize();
+  }
+
+  if (is_release) {
+    // Approximate store-release by issuing a full barrier before the store.
+    __sync_synchronize();
+  }
+  Memory::Write<T>(address, reg<T>(rs));
+
+  set_reg<T>(rt, data);
+
+  LogRead(address, rt, GetPrintRegisterFormat(element_size));
+  LogWrite(address, rs, GetPrintRegisterFormat(element_size));
+}
+
+template <typename T>
+void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
+  unsigned rt = instr->Rt();
+  unsigned rn = instr->Rn();
+
+  unsigned element_size = sizeof(T);
+  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+  // Verify that the address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+  address = Memory::AddressUntag(address);
+  if (handle_wasm_seg_fault(address, sizeof(T)))
+    return;
+
+  set_reg<T>(rt, Memory::Read<T>(address));
+
+  // Approximate load-acquire by issuing a full barrier after the load.
+  __sync_synchronize();
+
+  LogRead(address, rt, GetPrintRegisterFormat(element_size));
+}
+
+#define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
+  V(LDADD)                                \
+  V(LDCLR)                                \
+  V(LDEOR)                                \
+  V(LDSET)                                \
+  V(LDUMAX)                               \
+  V(LDUMIN)
+
+#define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
+  V(LDSMAX)                              \
+  V(LDSMIN)
+
+void Simulator::VisitAtomicMemory(const Instruction* instr) {
+  switch (instr->Mask(AtomicMemoryMask)) {
+// clang-format off
+#define SIM_FUNC_B(A) \
+    case A##B:        \
+    case A##AB:       \
+    case A##LB:       \
+    case A##ALB:
+#define SIM_FUNC_H(A) \
+    case A##H:        \
+    case A##AH:       \
+    case A##LH:       \
+    case A##ALH:
+#define SIM_FUNC_w(A) \
+    case A##_w:       \
+    case A##A_w:      \
+    case A##L_w:      \
+    case A##AL_w:
+#define SIM_FUNC_x(A) \
+    case A##_x:       \
+    case A##A_x:      \
+    case A##L_x:      \
+    case A##AL_x:
+
+    ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
+      AtomicMemorySimpleHelper<uint8_t>(instr);
+      break;
+    ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
+      AtomicMemorySimpleHelper<int8_t>(instr);
+      break;
+    ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
+      AtomicMemorySimpleHelper<uint16_t>(instr);
+      break;
+    ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
+      AtomicMemorySimpleHelper<int16_t>(instr);
+      break;
+    ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
+      AtomicMemorySimpleHelper<uint32_t>(instr);
+      break;
+    ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
+      AtomicMemorySimpleHelper<int32_t>(instr);
+      break;
+    ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
+      AtomicMemorySimpleHelper<uint64_t>(instr);
+      break;
+    ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
+      AtomicMemorySimpleHelper<int64_t>(instr);
+      break;
+      // clang-format on
+
+    case SWPB:
+    case SWPAB:
+    case SWPLB:
+    case SWPALB:
+      AtomicMemorySwapHelper<uint8_t>(instr);
+      break;
+    case SWPH:
+    case SWPAH:
+    case SWPLH:
+    case SWPALH:
+      AtomicMemorySwapHelper<uint16_t>(instr);
+      break;
+    case SWP_w:
+    case SWPA_w:
+    case SWPL_w:
+    case SWPAL_w:
+      AtomicMemorySwapHelper<uint32_t>(instr);
+      break;
+    case SWP_x:
+    case SWPA_x:
+    case SWPL_x:
+    case SWPAL_x:
+      AtomicMemorySwapHelper<uint64_t>(instr);
+      break;
+    case LDAPRB:
+      LoadAcquireRCpcHelper<uint8_t>(instr);
+      break;
+    case LDAPRH:
+      LoadAcquireRCpcHelper<uint16_t>(instr);
+      break;
+    case LDAPR_w:
+      LoadAcquireRCpcHelper<uint32_t>(instr);
+      break;
+    case LDAPR_x:
+      LoadAcquireRCpcHelper<uint64_t>(instr);
+      break;
+  }
+}
+
+void Simulator::VisitLoadLiteral(const Instruction* instr) {
+  unsigned rt = instr->Rt();
+  uint64_t address = instr->LiteralAddress<uint64_t>();
+
+  // Verify that the calculated address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+  switch (instr->Mask(LoadLiteralMask)) {
+    // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
+    // print a more detailed log.
+    case LDR_w_lit:
+      set_wreg(rt, Read<uint32_t>(address), NoRegLog);
+      LogRead(address, rt, kPrintWReg);
+      break;
+    case LDR_x_lit:
+      set_xreg(rt, Read<uint64_t>(address), NoRegLog);
+      LogRead(address, rt, kPrintXReg);
+      break;
+    case LDR_s_lit:
+      set_sreg(rt, Read<float>(address), NoRegLog);
+      LogVRead(address, rt, kPrintSReg);
+      break;
+    case LDR_d_lit:
+      set_dreg(rt, Read<double>(address), NoRegLog);
+      LogVRead(address, rt, kPrintDReg);
+      break;
+    case LDR_q_lit:
+      set_qreg(rt, Read<qreg_t>(address), NoRegLog);
+      LogVRead(address, rt, kPrintReg1Q);
+      break;
+    case LDRSW_x_lit:
+      set_xreg(rt, Read<int32_t>(address), NoRegLog);
+      LogRead(address, rt, kPrintWReg);
+      break;
+
+    // Ignore prfm hint instructions.
+    case PRFM_lit: break;
+
+    default: VIXL_UNREACHABLE();
+  }
+
+  local_monitor_.MaybeClear();
+}
+
+
+uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
+                                       int64_t offset,
+                                       AddrMode addrmode) {
+  uint64_t address = xreg(addr_reg, Reg31IsStackPointer);
+
+  if ((addr_reg == 31) && ((address % 16) != 0)) {
+    // When the base register is SP the stack pointer is required to be
+    // quadword aligned prior to the address calculation and write-backs.
+    // Misalignment will cause a stack alignment fault.
+    VIXL_ALIGNMENT_EXCEPTION();
+  }
+
+  if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
+    VIXL_ASSERT(offset != 0);
+    // Only preindex should log the register update here. For Postindex, the
+    // update will be printed automatically by LogWrittenRegisters _after_ the
+    // memory access itself is logged.
+    RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
+    set_xreg(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
+  }
+
+  if ((addrmode == Offset) || (addrmode == PreIndex)) {
+    address += offset;
+  }
+
+  // Verify that the calculated address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+  return static_cast<uintptr_t>(address);
+}
+
+
+void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
+  MoveWideImmediateOp mov_op =
+    static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
+  int64_t new_xn_val = 0;
+
+  bool is_64_bits = instr->SixtyFourBits() == 1;
+  // Shift is limited for W operations.
+  VIXL_ASSERT(is_64_bits || (instr->ShiftMoveWide() < 2));
+
+  // Get the shifted immediate.
+  int64_t shift = instr->ShiftMoveWide() * 16;
+  int64_t shifted_imm16 = static_cast<int64_t>(instr->ImmMoveWide()) << shift;
+
+  // Compute the new value.
+  switch (mov_op) {
+    case MOVN_w:
+    case MOVN_x: {
+        new_xn_val = ~shifted_imm16;
+        if (!is_64_bits) new_xn_val &= kWRegMask;
+      break;
+    }
+    case MOVK_w:
+    case MOVK_x: {
+        unsigned reg_code = instr->Rd();
+        int64_t prev_xn_val = is_64_bits ? xreg(reg_code)
+                                         : wreg(reg_code);
+        new_xn_val =
+            (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
+      break;
+    }
+    case MOVZ_w:
+    case MOVZ_x: {
+        new_xn_val = shifted_imm16;
+      break;
+    }
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  // Update the destination register.
+  set_xreg(instr->Rd(), new_xn_val);
+}
+
+
+void Simulator::VisitConditionalSelect(const Instruction* instr) {
+  uint64_t new_val = xreg(instr->Rn());
+
+  if (ConditionFailed(static_cast<Condition>(instr->Condition()))) {
+    new_val = xreg(instr->Rm());
+    switch (instr->Mask(ConditionalSelectMask)) {
+      case CSEL_w:
+      case CSEL_x: break;
+      case CSINC_w:
+      case CSINC_x: new_val++; break;
+      case CSINV_w:
+      case CSINV_x: new_val = ~new_val; break;
+      case CSNEG_w:
+      case CSNEG_x: new_val = -new_val; break;
+      default: VIXL_UNIMPLEMENTED();
+    }
+  }
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  set_reg(reg_size, instr->Rd(), new_val);
+}
+
+
+void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
+  unsigned dst = instr->Rd();
+  unsigned src = instr->Rn();
+
+  switch (instr->Mask(DataProcessing1SourceMask)) {
+    case RBIT_w: set_wreg(dst, ReverseBits(wreg(src))); break;
+    case RBIT_x: set_xreg(dst, ReverseBits(xreg(src))); break;
+    case REV16_w: set_wreg(dst, ReverseBytes(wreg(src), 1)); break;
+    case REV16_x: set_xreg(dst, ReverseBytes(xreg(src), 1)); break;
+    case REV_w: set_wreg(dst, ReverseBytes(wreg(src), 2)); break;
+    case REV32_x: set_xreg(dst, ReverseBytes(xreg(src), 2)); break;
+    case REV_x: set_xreg(dst, ReverseBytes(xreg(src), 3)); break;
+    case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src))); break;
+    case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src))); break;
+    case CLS_w: {
+      set_wreg(dst, CountLeadingSignBits(wreg(src)));
+      break;
+    }
+    case CLS_x: {
+      set_xreg(dst, CountLeadingSignBits(xreg(src)));
+      break;
+    }
+    default: VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
+  VIXL_ASSERT((n > 32) && (n <= 64));
+  for (unsigned i = (n - 1); i >= 32; i--) {
+    if (((data >> i) & 1) != 0) {
+      uint64_t polysh32 = (uint64_t)poly << (i - 32);
+      uint64_t mask = (UINT64_C(1) << i) - 1;
+      data = ((data & mask) ^ polysh32);
+    }
+  }
+  return data & 0xffffffff;
+}
+
+
+template <typename T>
+uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
+  unsigned size = sizeof(val) * 8;  // Number of bits in type T.
+  VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
+  uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
+  uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
+  return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
+}
+
+
+uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
+  // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
+  // the CRC of each 32-bit word sequentially.
+  acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
+  return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
+}
+
+
+void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
+  Shift shift_op = NO_SHIFT;
+  int64_t result = 0;
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+
+  switch (instr->Mask(DataProcessing2SourceMask)) {
+    case SDIV_w: {
+      int32_t rn = wreg(instr->Rn());
+      int32_t rm = wreg(instr->Rm());
+      if ((rn == kWMinInt) && (rm == -1)) {
+        result = kWMinInt;
+      } else if (rm == 0) {
+        // Division by zero can be trapped, but not on A-class processors.
+        result = 0;
+      } else {
+        result = rn / rm;
+      }
+      break;
+    }
+    case SDIV_x: {
+      int64_t rn = xreg(instr->Rn());
+      int64_t rm = xreg(instr->Rm());
+      if ((rn == kXMinInt) && (rm == -1)) {
+        result = kXMinInt;
+      } else if (rm == 0) {
+        // Division by zero can be trapped, but not on A-class processors.
+        result = 0;
+      } else {
+        result = rn / rm;
+      }
+      break;
+    }
+    case UDIV_w: {
+      uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn()));
+      uint32_t rm = static_cast<uint32_t>(wreg(instr->Rm()));
+      if (rm == 0) {
+        // Division by zero can be trapped, but not on A-class processors.
+        result = 0;
+      } else {
+        result = rn / rm;
+      }
+      break;
+    }
+    case UDIV_x: {
+      uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn()));
+      uint64_t rm = static_cast<uint64_t>(xreg(instr->Rm()));
+      if (rm == 0) {
+        // Division by zero can be trapped, but not on A-class processors.
+        result = 0;
+      } else {
+        result = rn / rm;
+      }
+      break;
+    }
+    case LSLV_w:
+    case LSLV_x: shift_op = LSL; break;
+    case LSRV_w:
+    case LSRV_x: shift_op = LSR; break;
+    case ASRV_w:
+    case ASRV_x: shift_op = ASR; break;
+    case RORV_w:
+    case RORV_x: shift_op = ROR; break;
+    case CRC32B: {
+      uint32_t acc = reg<uint32_t>(instr->Rn());
+      uint8_t  val = reg<uint8_t>(instr->Rm());
+      result = Crc32Checksum(acc, val, CRC32_POLY);
+      break;
+    }
+    case CRC32H: {
+      uint32_t acc = reg<uint32_t>(instr->Rn());
+      uint16_t val = reg<uint16_t>(instr->Rm());
+      result = Crc32Checksum(acc, val, CRC32_POLY);
+      break;
+    }
+    case CRC32W: {
+      uint32_t acc = reg<uint32_t>(instr->Rn());
+      uint32_t val = reg<uint32_t>(instr->Rm());
+      result = Crc32Checksum(acc, val, CRC32_POLY);
+      break;
+    }
+    case CRC32X: {
+      uint32_t acc = reg<uint32_t>(instr->Rn());
+      uint64_t val = reg<uint64_t>(instr->Rm());
+      result = Crc32Checksum(acc, val, CRC32_POLY);
+      reg_size = kWRegSize;
+      break;
+    }
+    case CRC32CB: {
+      uint32_t acc = reg<uint32_t>(instr->Rn());
+      uint8_t  val = reg<uint8_t>(instr->Rm());
+      result = Crc32Checksum(acc, val, CRC32C_POLY);
+      break;
+    }
+    case CRC32CH: {
+      uint32_t acc = reg<uint32_t>(instr->Rn());
+      uint16_t val = reg<uint16_t>(instr->Rm());
+      result = Crc32Checksum(acc, val, CRC32C_POLY);
+      break;
+    }
+    case CRC32CW: {
+      uint32_t acc = reg<uint32_t>(instr->Rn());
+      uint32_t val = reg<uint32_t>(instr->Rm());
+      result = Crc32Checksum(acc, val, CRC32C_POLY);
+      break;
+    }
+    case CRC32CX: {
+      uint32_t acc = reg<uint32_t>(instr->Rn());
+      uint64_t val = reg<uint64_t>(instr->Rm());
+      result = Crc32Checksum(acc, val, CRC32C_POLY);
+      reg_size = kWRegSize;
+      break;
+    }
+    default: VIXL_UNIMPLEMENTED();
+  }
+
+  if (shift_op != NO_SHIFT) {
+    // Shift distance encoded in the least-significant five/six bits of the
+    // register.
+    int mask = (instr->SixtyFourBits() == 1) ? 0x3f : 0x1f;
+    unsigned shift = wreg(instr->Rm()) & mask;
+    result = ShiftOperand(reg_size, reg(reg_size, instr->Rn()), shift_op,
+                          shift);
+  }
+  set_reg(reg_size, instr->Rd(), result);
+}
+
+
+// The algorithm used is adapted from the one described in section 8.2 of
+//   Hacker's Delight, by Henry S. Warren, Jr.
+// It assumes that a right shift on a signed integer is an arithmetic shift.
+// Type T must be either uint64_t or int64_t.
+template <typename T>
+static T MultiplyHigh(T u, T v) {
+  uint64_t u0, v0, w0;
+  T u1, v1, w1, w2, t;
+
+  VIXL_ASSERT(sizeof(u) == sizeof(u0));
+
+  u0 = u & 0xffffffff;
+  u1 = u >> 32;
+  v0 = v & 0xffffffff;
+  v1 = v >> 32;
+
+  w0 = u0 * v0;
+  t = u1 * v0 + (w0 >> 32);
+  w1 = t & 0xffffffff;
+  w2 = t >> 32;
+  w1 = u0 * v1 + w1;
+
+  return u1 * v1 + w2 + (w1 >> 32);
+}
+
+
+void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+
+  int64_t result = 0;
+  // Extract and sign- or zero-extend 32-bit arguments for widening operations.
+  uint64_t rn_u32 = reg<uint32_t>(instr->Rn());
+  uint64_t rm_u32 = reg<uint32_t>(instr->Rm());
+  int64_t rn_s32 = reg<int32_t>(instr->Rn());
+  int64_t rm_s32 = reg<int32_t>(instr->Rm());
+  switch (instr->Mask(DataProcessing3SourceMask)) {
+    case MADD_w:
+    case MADD_x:
+      result = xreg(instr->Ra()) + (xreg(instr->Rn()) * xreg(instr->Rm()));
+      break;
+    case MSUB_w:
+    case MSUB_x:
+      result = xreg(instr->Ra()) - (xreg(instr->Rn()) * xreg(instr->Rm()));
+      break;
+    case SMADDL_x: result = xreg(instr->Ra()) + (rn_s32 * rm_s32); break;
+    case SMSUBL_x: result = xreg(instr->Ra()) - (rn_s32 * rm_s32); break;
+    case UMADDL_x: result = xreg(instr->Ra()) + (rn_u32 * rm_u32); break;
+    case UMSUBL_x: result = xreg(instr->Ra()) - (rn_u32 * rm_u32); break;
+    case UMULH_x:
+      result = MultiplyHigh(reg<uint64_t>(instr->Rn()),
+                            reg<uint64_t>(instr->Rm()));
+      break;
+    case SMULH_x:
+      result = MultiplyHigh(xreg(instr->Rn()), xreg(instr->Rm()));
+      break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+  set_reg(reg_size, instr->Rd(), result);
+}
+
+
+void Simulator::VisitBitfield(const Instruction* instr) {
+  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+  int64_t reg_mask = instr->SixtyFourBits() ? kXRegMask : kWRegMask;
+  int64_t R = instr->ImmR();
+  int64_t S = instr->ImmS();
+  int64_t diff = S - R;
+  int64_t mask;
+  if (diff >= 0) {
+    mask = (diff < (reg_size - 1)) ? (INT64_C(1) << (diff + 1)) - 1
+                                   : reg_mask;
+  } else {
+    mask = (INT64_C(1) << (S + 1)) - 1;
+    mask = (static_cast<uint64_t>(mask) >> R) | (mask << (reg_size - R));
+    diff += reg_size;
+  }
+
+  // inzero indicates if the extracted bitfield is inserted into the
+  // destination register value or in zero.
+  // If extend is true, extend the sign of the extracted bitfield.
+  bool inzero = false;
+  bool extend = false;
+  switch (instr->Mask(BitfieldMask)) {
+    case BFM_x:
+    case BFM_w:
+      break;
+    case SBFM_x:
+    case SBFM_w:
+      inzero = true;
+      extend = true;
+      break;
+    case UBFM_x:
+    case UBFM_w:
+      inzero = true;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+
+  int64_t dst = inzero ? 0 : reg(reg_size, instr->Rd());
+  int64_t src = reg(reg_size, instr->Rn());
+  // Rotate source bitfield into place.
+  int64_t result = (static_cast<uint64_t>(src) >> R) | (src << (reg_size - R));
+  // Determine the sign extension.
+  int64_t topbits = ((INT64_C(1) << (reg_size - diff - 1)) - 1) << (diff + 1);
+  int64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
+
+  // Merge sign extension, dest/zero and bitfield.
+  result = signbits | (result & mask) | (dst & ~mask);
+
+  set_reg(reg_size, instr->Rd(), result);
+}
+
+
+void Simulator::VisitExtract(const Instruction* instr) {
+  unsigned lsb = instr->ImmS();
+  unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize
+                                                    : kWRegSize;
+  uint64_t low_res = static_cast<uint64_t>(reg(reg_size, instr->Rm())) >> lsb;
+  uint64_t high_res =
+      (lsb == 0) ? 0 : reg(reg_size, instr->Rn()) << (reg_size - lsb);
+  set_reg(reg_size, instr->Rd(), low_res | high_res);
+}
+
+
+void Simulator::VisitFPImmediate(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  unsigned dest = instr->Rd();
+  switch (instr->Mask(FPImmediateMask)) {
+    case FMOV_s_imm: set_sreg(dest, instr->ImmFP32()); break;
+    case FMOV_d_imm: set_dreg(dest, instr->ImmFP64()); break;
+    default: VIXL_UNREACHABLE();
+  }
+}
+
+
+void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  unsigned dst = instr->Rd();
+  unsigned src = instr->Rn();
+
+  FPRounding round = RMode();
+
+  switch (instr->Mask(FPIntegerConvertMask)) {
+    case FCVTAS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieAway)); break;
+    case FCVTAS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieAway)); break;
+    case FCVTAS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieAway)); break;
+    case FCVTAS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieAway)); break;
+    case FCVTAU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieAway)); break;
+    case FCVTAU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieAway)); break;
+    case FCVTAU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieAway)); break;
+    case FCVTAU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieAway)); break;
+    case FCVTMS_ws:
+      set_wreg(dst, FPToInt32(sreg(src), FPNegativeInfinity));
+      break;
+    case FCVTMS_xs:
+      set_xreg(dst, FPToInt64(sreg(src), FPNegativeInfinity));
+      break;
+    case FCVTMS_wd:
+      set_wreg(dst, FPToInt32(dreg(src), FPNegativeInfinity));
+      break;
+    case FCVTMS_xd:
+      set_xreg(dst, FPToInt64(dreg(src), FPNegativeInfinity));
+      break;
+    case FCVTMU_ws:
+      set_wreg(dst, FPToUInt32(sreg(src), FPNegativeInfinity));
+      break;
+    case FCVTMU_xs:
+      set_xreg(dst, FPToUInt64(sreg(src), FPNegativeInfinity));
+      break;
+    case FCVTMU_wd:
+      set_wreg(dst, FPToUInt32(dreg(src), FPNegativeInfinity));
+      break;
+    case FCVTMU_xd:
+      set_xreg(dst, FPToUInt64(dreg(src), FPNegativeInfinity));
+      break;
+    case FCVTPS_ws:
+      set_wreg(dst, FPToInt32(sreg(src), FPPositiveInfinity));
+      break;
+    case FCVTPS_xs:
+      set_xreg(dst, FPToInt64(sreg(src), FPPositiveInfinity));
+      break;
+    case FCVTPS_wd:
+      set_wreg(dst, FPToInt32(dreg(src), FPPositiveInfinity));
+      break;
+    case FCVTPS_xd:
+      set_xreg(dst, FPToInt64(dreg(src), FPPositiveInfinity));
+      break;
+    case FCVTPU_ws:
+      set_wreg(dst, FPToUInt32(sreg(src), FPPositiveInfinity));
+      break;
+    case FCVTPU_xs:
+      set_xreg(dst, FPToUInt64(sreg(src), FPPositiveInfinity));
+      break;
+    case FCVTPU_wd:
+      set_wreg(dst, FPToUInt32(dreg(src), FPPositiveInfinity));
+      break;
+    case FCVTPU_xd:
+      set_xreg(dst, FPToUInt64(dreg(src), FPPositiveInfinity));
+      break;
+    case FCVTNS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieEven)); break;
+    case FCVTNS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieEven)); break;
+    case FCVTNS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieEven)); break;
+    case FCVTNS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieEven)); break;
+    case FCVTNU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieEven)); break;
+    case FCVTNU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieEven)); break;
+    case FCVTNU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieEven)); break;
+    case FCVTNU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieEven)); break;
+    case FCVTZS_ws: set_wreg(dst, FPToInt32(sreg(src), FPZero)); break;
+    case FCVTZS_xs: set_xreg(dst, FPToInt64(sreg(src), FPZero)); break;
+    case FCVTZS_wd: set_wreg(dst, FPToInt32(dreg(src), FPZero)); break;
+    case FCVTZS_xd: set_xreg(dst, FPToInt64(dreg(src), FPZero)); break;
+    case FCVTZU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPZero)); break;
+    case FCVTZU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPZero)); break;
+    case FCVTZU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPZero)); break;
+    case FCVTZU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPZero)); break;
+    case FJCVTZS: set_wreg(dst, FPToFixedJS(dreg(src))); break;
+    case FMOV_ws: set_wreg(dst, sreg_bits(src)); break;
+    case FMOV_xd: set_xreg(dst, dreg_bits(src)); break;
+    case FMOV_sw: set_sreg_bits(dst, wreg(src)); break;
+    case FMOV_dx: set_dreg_bits(dst, xreg(src)); break;
+    case FMOV_d1_x:
+      LogicVRegister(vreg(dst)).SetUint(kFormatD, 1, xreg(src));
+      break;
+    case FMOV_x_d1:
+      set_xreg(dst, LogicVRegister(vreg(src)).Uint(kFormatD, 1));
+      break;
+
+    // A 32-bit input can be handled in the same way as a 64-bit input, since
+    // the sign- or zero-extension will not affect the conversion.
+    case SCVTF_dx: set_dreg(dst, FixedToDouble(xreg(src), 0, round)); break;
+    case SCVTF_dw: set_dreg(dst, FixedToDouble(wreg(src), 0, round)); break;
+    case UCVTF_dx: set_dreg(dst, UFixedToDouble(xreg(src), 0, round)); break;
+    case UCVTF_dw: {
+      set_dreg(dst, UFixedToDouble(static_cast<uint32_t>(wreg(src)), 0, round));
+      break;
+    }
+    case SCVTF_sx: set_sreg(dst, FixedToFloat(xreg(src), 0, round)); break;
+    case SCVTF_sw: set_sreg(dst, FixedToFloat(wreg(src), 0, round)); break;
+    case UCVTF_sx: set_sreg(dst, UFixedToFloat(xreg(src), 0, round)); break;
+    case UCVTF_sw: {
+      set_sreg(dst, UFixedToFloat(static_cast<uint32_t>(wreg(src)), 0, round));
+      break;
+    }
+
+    default: VIXL_UNREACHABLE();
+  }
+}
+
+
+void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  unsigned dst = instr->Rd();
+  unsigned src = instr->Rn();
+  int fbits = 64 - instr->FPScale();
+
+  FPRounding round = RMode();
+
+  switch (instr->Mask(FPFixedPointConvertMask)) {
+    // A 32-bit input can be handled in the same way as a 64-bit input, since
+    // the sign- or zero-extension will not affect the conversion.
+    case SCVTF_dx_fixed:
+      set_dreg(dst, FixedToDouble(xreg(src), fbits, round));
+      break;
+    case SCVTF_dw_fixed:
+      set_dreg(dst, FixedToDouble(wreg(src), fbits, round));
+      break;
+    case UCVTF_dx_fixed:
+      set_dreg(dst, UFixedToDouble(xreg(src), fbits, round));
+      break;
+    case UCVTF_dw_fixed: {
+      set_dreg(dst,
+               UFixedToDouble(static_cast<uint32_t>(wreg(src)), fbits, round));
+      break;
+    }
+    case SCVTF_sx_fixed:
+      set_sreg(dst, FixedToFloat(xreg(src), fbits, round));
+      break;
+    case SCVTF_sw_fixed:
+      set_sreg(dst, FixedToFloat(wreg(src), fbits, round));
+      break;
+    case UCVTF_sx_fixed:
+      set_sreg(dst, UFixedToFloat(xreg(src), fbits, round));
+      break;
+    case UCVTF_sw_fixed: {
+      set_sreg(dst,
+               UFixedToFloat(static_cast<uint32_t>(wreg(src)), fbits, round));
+      break;
+    }
+    case FCVTZS_xd_fixed:
+      set_xreg(dst, FPToInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
+      break;
+    case FCVTZS_wd_fixed:
+      set_wreg(dst, FPToInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
+      break;
+    case FCVTZU_xd_fixed:
+      set_xreg(dst, FPToUInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
+      break;
+    case FCVTZU_wd_fixed:
+      set_wreg(dst, FPToUInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
+      break;
+    case FCVTZS_xs_fixed:
+      set_xreg(dst, FPToInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
+      break;
+    case FCVTZS_ws_fixed:
+      set_wreg(dst, FPToInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
+      break;
+    case FCVTZU_xs_fixed:
+      set_xreg(dst, FPToUInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
+      break;
+    case FCVTZU_ws_fixed:
+      set_wreg(dst, FPToUInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
+      break;
+    default: VIXL_UNREACHABLE();
+  }
+}
+
+
+void Simulator::VisitFPCompare(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  FPTrapFlags trap = DisableTrap;
+  switch (instr->Mask(FPCompareMask)) {
+    case FCMPE_s: trap = EnableTrap; VIXL_FALLTHROUGH();
+    case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap); break;
+    case FCMPE_d: trap = EnableTrap; VIXL_FALLTHROUGH();
+    case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap); break;
+    case FCMPE_s_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
+    case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f, trap); break;
+    case FCMPE_d_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
+    case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0, trap); break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  FPTrapFlags trap = DisableTrap;
+  switch (instr->Mask(FPConditionalCompareMask)) {
+    case FCCMPE_s: trap = EnableTrap;
+      VIXL_FALLTHROUGH();
+    case FCCMP_s:
+      if (ConditionPassed(instr->Condition())) {
+        FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap);
+      } else {
+        nzcv().SetFlags(instr->Nzcv());
+        LogSystemRegister(NZCV);
+      }
+      break;
+    case FCCMPE_d: trap = EnableTrap;
+      VIXL_FALLTHROUGH();
+    case FCCMP_d:
+      if (ConditionPassed(instr->Condition())) {
+        FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap);
+      } else {
+        nzcv().SetFlags(instr->Nzcv());
+        LogSystemRegister(NZCV);
+      }
+      break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  Instr selected;
+  if (ConditionPassed(instr->Condition())) {
+    selected = instr->Rn();
+  } else {
+    selected = instr->Rm();
+  }
+
+  switch (instr->Mask(FPConditionalSelectMask)) {
+    case FCSEL_s: set_sreg(instr->Rd(), sreg(selected)); break;
+    case FCSEL_d: set_dreg(instr->Rd(), dreg(selected)); break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+  VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS;
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  bool inexact_exception = false;
+
+  unsigned fd = instr->Rd();
+  unsigned fn = instr->Rn();
+
+  switch (instr->Mask(FPDataProcessing1SourceMask)) {
+    case FMOV_s: set_sreg(fd, sreg(fn)); return;
+    case FMOV_d: set_dreg(fd, dreg(fn)); return;
+    case FABS_s: fabs_(kFormatS, vreg(fd), vreg(fn)); return;
+    case FABS_d: fabs_(kFormatD, vreg(fd), vreg(fn)); return;
+    case FNEG_s: fneg(kFormatS, vreg(fd), vreg(fn)); return;
+    case FNEG_d: fneg(kFormatD, vreg(fd), vreg(fn)); return;
+    case FCVT_ds:
+      set_dreg(fd, FPToDouble(sreg(fn), ReadDN()));
+      return;
+    case FCVT_sd:
+      set_sreg(fd, FPToFloat(dreg(fn), FPTieEven, ReadDN()));
+      return;
+    case FCVT_hs:
+      set_hreg(fd, Float16ToRawbits(FPToFloat16(sreg(fn), FPTieEven, ReadDN())));
+      return;
+    case FCVT_sh:
+      set_sreg(fd, FPToFloat(RawbitsToFloat16(hreg(fn)), ReadDN()));
+      return;
+    case FCVT_dh:
+      set_dreg(fd, FPToDouble(hreg(fn), ReadDN()));
+      return;
+    case FCVT_hd:
+      set_hreg(fd, Float16ToRawbits(FPToFloat16(dreg(fn), FPTieEven, ReadDN())));
+      return;
+    case FSQRT_s:
+    case FSQRT_d: fsqrt(vform, rd, rn); return;
+    case FRINTI_s:
+    case FRINTI_d: break;  // Use FPCR rounding mode.
+    case FRINTX_s:
+    case FRINTX_d: inexact_exception = true; break;
+    case FRINTA_s:
+    case FRINTA_d: fpcr_rounding = FPTieAway; break;
+    case FRINTM_s:
+    case FRINTM_d: fpcr_rounding = FPNegativeInfinity; break;
+    case FRINTN_s:
+    case FRINTN_d: fpcr_rounding = FPTieEven; break;
+    case FRINTP_s:
+    case FRINTP_d: fpcr_rounding = FPPositiveInfinity; break;
+    case FRINTZ_s:
+    case FRINTZ_d: fpcr_rounding = FPZero; break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+
+  // Only FRINT* instructions fall through the switch above.
+  frint(vform, rd, rn, fpcr_rounding, inexact_exception);
+}
+
+
+void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS;
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  SimVRegister& rm = vreg(instr->Rm());
+
+  switch (instr->Mask(FPDataProcessing2SourceMask)) {
+    case FADD_s:
+    case FADD_d: fadd(vform, rd, rn, rm); break;
+    case FSUB_s:
+    case FSUB_d: fsub(vform, rd, rn, rm); break;
+    case FMUL_s:
+    case FMUL_d: fmul(vform, rd, rn, rm); break;
+    case FNMUL_s:
+    case FNMUL_d: fnmul(vform, rd, rn, rm); break;
+    case FDIV_s:
+    case FDIV_d: fdiv(vform, rd, rn, rm); break;
+    case FMAX_s:
+    case FMAX_d: fmax(vform, rd, rn, rm); break;
+    case FMIN_s:
+    case FMIN_d: fmin(vform, rd, rn, rm); break;
+    case FMAXNM_s:
+    case FMAXNM_d: fmaxnm(vform, rd, rn, rm); break;
+    case FMINNM_s:
+    case FMINNM_d: fminnm(vform, rd, rn, rm); break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+}
+
+
+void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
+  AssertSupportedFPCR();
+
+  unsigned fd = instr->Rd();
+  unsigned fn = instr->Rn();
+  unsigned fm = instr->Rm();
+  unsigned fa = instr->Ra();
+
+  switch (instr->Mask(FPDataProcessing3SourceMask)) {
+    // fd = fa +/- (fn * fm)
+    case FMADD_s: set_sreg(fd, FPMulAdd(sreg(fa), sreg(fn), sreg(fm))); break;
+    case FMSUB_s: set_sreg(fd, FPMulAdd(sreg(fa), -sreg(fn), sreg(fm))); break;
+    case FMADD_d: set_dreg(fd, FPMulAdd(dreg(fa), dreg(fn), dreg(fm))); break;
+    case FMSUB_d: set_dreg(fd, FPMulAdd(dreg(fa), -dreg(fn), dreg(fm))); break;
+    // Negated variants of the above.
+    case FNMADD_s:
+      set_sreg(fd, FPMulAdd(-sreg(fa), -sreg(fn), sreg(fm)));
+      break;
+    case FNMSUB_s:
+      set_sreg(fd, FPMulAdd(-sreg(fa), sreg(fn), sreg(fm)));
+      break;
+    case FNMADD_d:
+      set_dreg(fd, FPMulAdd(-dreg(fa), -dreg(fn), dreg(fm)));
+      break;
+    case FNMSUB_d:
+      set_dreg(fd, FPMulAdd(-dreg(fa), dreg(fn), dreg(fm)));
+      break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+bool Simulator::FPProcessNaNs(const Instruction* instr) {
+  unsigned fd = instr->Rd();
+  unsigned fn = instr->Rn();
+  unsigned fm = instr->Rm();
+  bool done = false;
+
+  if (instr->Mask(FP64) == FP64) {
+    double result = FPProcessNaNs(dreg(fn), dreg(fm));
+    if (std::isnan(result)) {
+      set_dreg(fd, result);
+      done = true;
+    }
+  } else {
+    float result = FPProcessNaNs(sreg(fn), sreg(fm));
+    if (std::isnan(result)) {
+      set_sreg(fd, result);
+      done = true;
+    }
+  }
+
+  return done;
+}
+
+
+void Simulator::SysOp_W(int op, int64_t val) {
+  switch (op) {
+    case IVAU:
+    case CVAC:
+    case CVAU:
+    case CIVAC: {
+      // Perform a dummy memory access to ensure that we have read access
+      // to the specified address.
+      volatile uint8_t y = Read<uint8_t>(val);
+      USE(y);
+      // TODO: Implement "case ZVA:".
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitSystem(const Instruction* instr) {
+  // Some system instructions hijack their Op and Cp fields to represent a
+  // range of immediates instead of indicating a different instruction. This
+  // makes the decoding tricky.
+  if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
+    VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX);
+    switch (instr->Mask(SystemExclusiveMonitorMask)) {
+      case CLREX: {
+        PrintExclusiveAccessWarning();
+        ClearLocalMonitor();
+        break;
+      }
+    }
+  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
+    switch (instr->Mask(SystemSysRegMask)) {
+      case MRS: {
+        switch (instr->ImmSystemRegister()) {
+          case NZCV: set_xreg(instr->Rt(), nzcv().RawValue()); break;
+          case FPCR: set_xreg(instr->Rt(), fpcr().RawValue()); break;
+          default: VIXL_UNIMPLEMENTED();
+        }
+        break;
+      }
+      case MSR: {
+        switch (instr->ImmSystemRegister()) {
+          case NZCV:
+            nzcv().SetRawValue(wreg(instr->Rt()));
+            LogSystemRegister(NZCV);
+            break;
+          case FPCR:
+            fpcr().SetRawValue(wreg(instr->Rt()));
+            LogSystemRegister(FPCR);
+            break;
+          default: VIXL_UNIMPLEMENTED();
+        }
+        break;
+      }
+    }
+  } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
+    VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
+    switch (instr->ImmHint()) {
+      case NOP: break;
+      case CSDB: break;
+      default: VIXL_UNIMPLEMENTED();
+    }
+  } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
+    js::jit::AtomicOperations::fenceSeqCst();
+  } else if ((instr->Mask(SystemSysFMask) == SystemSysFixed)) {
+    switch (instr->Mask(SystemSysMask)) {
+      case SYS: SysOp_W(instr->SysOp(), xreg(instr->Rt())); break;
+      default: VIXL_UNIMPLEMENTED();
+    }
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
+  VisitUnimplemented(instr);
+}
+
+
+void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
+  VisitUnimplemented(instr);
+}
+
+
+void Simulator::VisitCryptoAES(const Instruction* instr) {
+  VisitUnimplemented(instr);
+}
+
+
+void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr);
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  static const NEONFormatMap map_lp = {
+    {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}
+  };
+  VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
+
+  static const NEONFormatMap map_fcvtl = {
+    {22}, {NF_4S, NF_2D}
+  };
+  VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
+
+  static const NEONFormatMap map_fcvtn = {
+    {22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S}
+  };
+  VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+
+  if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
+    // These instructions all use a two bit size field, except NOT and RBIT,
+    // which use the field to encode the operation.
+    switch (instr->Mask(NEON2RegMiscMask)) {
+      case NEON_REV64:     rev64(vf, rd, rn); break;
+      case NEON_REV32:     rev32(vf, rd, rn); break;
+      case NEON_REV16:     rev16(vf, rd, rn); break;
+      case NEON_SUQADD:    suqadd(vf, rd, rn); break;
+      case NEON_USQADD:    usqadd(vf, rd, rn); break;
+      case NEON_CLS:       cls(vf, rd, rn); break;
+      case NEON_CLZ:       clz(vf, rd, rn); break;
+      case NEON_CNT:       cnt(vf, rd, rn); break;
+      case NEON_SQABS:     abs(vf, rd, rn).SignedSaturate(vf); break;
+      case NEON_SQNEG:     neg(vf, rd, rn).SignedSaturate(vf); break;
+      case NEON_CMGT_zero: cmp(vf, rd, rn, 0, gt); break;
+      case NEON_CMGE_zero: cmp(vf, rd, rn, 0, ge); break;
+      case NEON_CMEQ_zero: cmp(vf, rd, rn, 0, eq); break;
+      case NEON_CMLE_zero: cmp(vf, rd, rn, 0, le); break;
+      case NEON_CMLT_zero: cmp(vf, rd, rn, 0, lt); break;
+      case NEON_ABS:       abs(vf, rd, rn); break;
+      case NEON_NEG:       neg(vf, rd, rn); break;
+      case NEON_SADDLP:    saddlp(vf_lp, rd, rn); break;
+      case NEON_UADDLP:    uaddlp(vf_lp, rd, rn); break;
+      case NEON_SADALP:    sadalp(vf_lp, rd, rn); break;
+      case NEON_UADALP:    uadalp(vf_lp, rd, rn); break;
+      case NEON_RBIT_NOT:
+        vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
+        switch (instr->FPType()) {
+          case 0: not_(vf, rd, rn); break;
+          case 1: rbit(vf, rd, rn);; break;
+          default:
+            VIXL_UNIMPLEMENTED();
+        }
+        break;
+    }
+  } else {
+    VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
+    FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+    bool inexact_exception = false;
+
+    // These instructions all use a one bit size field, except XTN, SQXTUN,
+    // SHLL, SQXTN and UQXTN, which use a two bit size field.
+    switch (instr->Mask(NEON2RegMiscFPMask)) {
+      case NEON_FABS:   fabs_(fpf, rd, rn); return;
+      case NEON_FNEG:   fneg(fpf, rd, rn); return;
+      case NEON_FSQRT:  fsqrt(fpf, rd, rn); return;
+      case NEON_FCVTL:
+        if (instr->Mask(NEON_Q)) {
+          fcvtl2(vf_fcvtl, rd, rn);
+        } else {
+          fcvtl(vf_fcvtl, rd, rn);
+        }
+        return;
+      case NEON_FCVTN:
+        if (instr->Mask(NEON_Q)) {
+          fcvtn2(vf_fcvtn, rd, rn);
+        } else {
+          fcvtn(vf_fcvtn, rd, rn);
+        }
+        return;
+      case NEON_FCVTXN:
+        if (instr->Mask(NEON_Q)) {
+          fcvtxn2(vf_fcvtn, rd, rn);
+        } else {
+          fcvtxn(vf_fcvtn, rd, rn);
+        }
+        return;
+
+      // The following instructions break from the switch statement, rather
+      // than return.
+      case NEON_FRINTI:     break;  // Use FPCR rounding mode.
+      case NEON_FRINTX:     inexact_exception = true; break;
+      case NEON_FRINTA:     fpcr_rounding = FPTieAway; break;
+      case NEON_FRINTM:     fpcr_rounding = FPNegativeInfinity; break;
+      case NEON_FRINTN:     fpcr_rounding = FPTieEven; break;
+      case NEON_FRINTP:     fpcr_rounding = FPPositiveInfinity; break;
+      case NEON_FRINTZ:     fpcr_rounding = FPZero; break;
+
+      case NEON_FCVTNS:     fcvts(fpf, rd, rn, FPTieEven); return;
+      case NEON_FCVTNU:     fcvtu(fpf, rd, rn, FPTieEven); return;
+      case NEON_FCVTPS:     fcvts(fpf, rd, rn, FPPositiveInfinity); return;
+      case NEON_FCVTPU:     fcvtu(fpf, rd, rn, FPPositiveInfinity); return;
+      case NEON_FCVTMS:     fcvts(fpf, rd, rn, FPNegativeInfinity); return;
+      case NEON_FCVTMU:     fcvtu(fpf, rd, rn, FPNegativeInfinity); return;
+      case NEON_FCVTZS:     fcvts(fpf, rd, rn, FPZero); return;
+      case NEON_FCVTZU:     fcvtu(fpf, rd, rn, FPZero); return;
+      case NEON_FCVTAS:     fcvts(fpf, rd, rn, FPTieAway); return;
+      case NEON_FCVTAU:     fcvtu(fpf, rd, rn, FPTieAway); return;
+      case NEON_SCVTF:      scvtf(fpf, rd, rn, 0, fpcr_rounding); return;
+      case NEON_UCVTF:      ucvtf(fpf, rd, rn, 0, fpcr_rounding); return;
+      case NEON_URSQRTE:    ursqrte(fpf, rd, rn); return;
+      case NEON_URECPE:     urecpe(fpf, rd, rn); return;
+      case NEON_FRSQRTE:    frsqrte(fpf, rd, rn); return;
+      case NEON_FRECPE:     frecpe(fpf, rd, rn, fpcr_rounding); return;
+      case NEON_FCMGT_zero: fcmp_zero(fpf, rd, rn, gt); return;
+      case NEON_FCMGE_zero: fcmp_zero(fpf, rd, rn, ge); return;
+      case NEON_FCMEQ_zero: fcmp_zero(fpf, rd, rn, eq); return;
+      case NEON_FCMLE_zero: fcmp_zero(fpf, rd, rn, le); return;
+      case NEON_FCMLT_zero: fcmp_zero(fpf, rd, rn, lt); return;
+      default:
+        if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
+            (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
+          switch (instr->Mask(NEON2RegMiscMask)) {
+            case NEON_XTN: xtn(vf, rd, rn); return;
+            case NEON_SQXTN: sqxtn(vf, rd, rn); return;
+            case NEON_UQXTN: uqxtn(vf, rd, rn); return;
+            case NEON_SQXTUN: sqxtun(vf, rd, rn); return;
+            case NEON_SHLL:
+              vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
+              if (instr->Mask(NEON_Q)) {
+                shll2(vf, rd, rn);
+              } else {
+                shll(vf, rd, rn);
+              }
+              return;
+            default:
+              VIXL_UNIMPLEMENTED();
+          }
+        } else {
+          VIXL_UNIMPLEMENTED();
+        }
+    }
+
+    // Only FRINT* instructions fall through the switch above.
+    frint(fpf, rd, rn, fpcr_rounding, inexact_exception);
+  }
+}
+
+
+void Simulator::VisitNEON3Same(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr);
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  SimVRegister& rm = vreg(instr->Rm());
+
+  if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
+    VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
+    switch (instr->Mask(NEON3SameLogicalMask)) {
+      case NEON_AND: and_(vf, rd, rn, rm); break;
+      case NEON_ORR: orr(vf, rd, rn, rm); break;
+      case NEON_ORN: orn(vf, rd, rn, rm); break;
+      case NEON_EOR: eor(vf, rd, rn, rm); break;
+      case NEON_BIC: bic(vf, rd, rn, rm); break;
+      case NEON_BIF: bif(vf, rd, rn, rm); break;
+      case NEON_BIT: bit(vf, rd, rn, rm); break;
+      case NEON_BSL: bsl(vf, rd, rn, rm); break;
+      default:
+        VIXL_UNIMPLEMENTED();
+    }
+  } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
+    VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
+    switch (instr->Mask(NEON3SameFPMask)) {
+      case NEON_FADD:    fadd(vf, rd, rn, rm); break;
+      case NEON_FSUB:    fsub(vf, rd, rn, rm); break;
+      case NEON_FMUL:    fmul(vf, rd, rn, rm); break;
+      case NEON_FDIV:    fdiv(vf, rd, rn, rm); break;
+      case NEON_FMAX:    fmax(vf, rd, rn, rm); break;
+      case NEON_FMIN:    fmin(vf, rd, rn, rm); break;
+      case NEON_FMAXNM:  fmaxnm(vf, rd, rn, rm); break;
+      case NEON_FMINNM:  fminnm(vf, rd, rn, rm); break;
+      case NEON_FMLA:    fmla(vf, rd, rn, rm); break;
+      case NEON_FMLS:    fmls(vf, rd, rn, rm); break;
+      case NEON_FMULX:   fmulx(vf, rd, rn, rm); break;
+      case NEON_FACGE:   fabscmp(vf, rd, rn, rm, ge); break;
+      case NEON_FACGT:   fabscmp(vf, rd, rn, rm, gt); break;
+      case NEON_FCMEQ:   fcmp(vf, rd, rn, rm, eq); break;
+      case NEON_FCMGE:   fcmp(vf, rd, rn, rm, ge); break;
+      case NEON_FCMGT:   fcmp(vf, rd, rn, rm, gt); break;
+      case NEON_FRECPS:  frecps(vf, rd, rn, rm); break;
+      case NEON_FRSQRTS: frsqrts(vf, rd, rn, rm); break;
+      case NEON_FABD:    fabd(vf, rd, rn, rm); break;
+      case NEON_FADDP:   faddp(vf, rd, rn, rm); break;
+      case NEON_FMAXP:   fmaxp(vf, rd, rn, rm); break;
+      case NEON_FMAXNMP: fmaxnmp(vf, rd, rn, rm); break;
+      case NEON_FMINP:   fminp(vf, rd, rn, rm); break;
+      case NEON_FMINNMP: fminnmp(vf, rd, rn, rm); break;
+      default:
+        VIXL_UNIMPLEMENTED();
+    }
+  } else {
+    VectorFormat vf = nfd.GetVectorFormat();
+    switch (instr->Mask(NEON3SameMask)) {
+      case NEON_ADD:   add(vf, rd, rn, rm);  break;
+      case NEON_ADDP:  addp(vf, rd, rn, rm); break;
+      case NEON_CMEQ:  cmp(vf, rd, rn, rm, eq); break;
+      case NEON_CMGE:  cmp(vf, rd, rn, rm, ge); break;
+      case NEON_CMGT:  cmp(vf, rd, rn, rm, gt); break;
+      case NEON_CMHI:  cmp(vf, rd, rn, rm, hi); break;
+      case NEON_CMHS:  cmp(vf, rd, rn, rm, hs); break;
+      case NEON_CMTST: cmptst(vf, rd, rn, rm); break;
+      case NEON_MLS:   mls(vf, rd, rn, rm); break;
+      case NEON_MLA:   mla(vf, rd, rn, rm); break;
+      case NEON_MUL:   mul(vf, rd, rn, rm); break;
+      case NEON_PMUL:  pmul(vf, rd, rn, rm); break;
+      case NEON_SMAX:  smax(vf, rd, rn, rm); break;
+      case NEON_SMAXP: smaxp(vf, rd, rn, rm); break;
+      case NEON_SMIN:  smin(vf, rd, rn, rm); break;
+      case NEON_SMINP: sminp(vf, rd, rn, rm); break;
+      case NEON_SUB:   sub(vf, rd, rn, rm);  break;
+      case NEON_UMAX:  umax(vf, rd, rn, rm); break;
+      case NEON_UMAXP: umaxp(vf, rd, rn, rm); break;
+      case NEON_UMIN:  umin(vf, rd, rn, rm); break;
+      case NEON_UMINP: uminp(vf, rd, rn, rm); break;
+      case NEON_SSHL:  sshl(vf, rd, rn, rm); break;
+      case NEON_USHL:  ushl(vf, rd, rn, rm); break;
+      case NEON_SABD:  absdiff(vf, rd, rn, rm, true); break;
+      case NEON_UABD:  absdiff(vf, rd, rn, rm, false); break;
+      case NEON_SABA:  saba(vf, rd, rn, rm); break;
+      case NEON_UABA:  uaba(vf, rd, rn, rm); break;
+      case NEON_UQADD: add(vf, rd, rn, rm).UnsignedSaturate(vf); break;
+      case NEON_SQADD: add(vf, rd, rn, rm).SignedSaturate(vf); break;
+      case NEON_UQSUB: sub(vf, rd, rn, rm).UnsignedSaturate(vf); break;
+      case NEON_SQSUB: sub(vf, rd, rn, rm).SignedSaturate(vf); break;
+      case NEON_SQDMULH:  sqdmulh(vf, rd, rn, rm); break;
+      case NEON_SQRDMULH: sqrdmulh(vf, rd, rn, rm); break;
+      case NEON_UQSHL: ushl(vf, rd, rn, rm).UnsignedSaturate(vf); break;
+      case NEON_SQSHL: sshl(vf, rd, rn, rm).SignedSaturate(vf); break;
+      case NEON_URSHL: ushl(vf, rd, rn, rm).Round(vf); break;
+      case NEON_SRSHL: sshl(vf, rd, rn, rm).Round(vf); break;
+      case NEON_UQRSHL:
+        ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
+        break;
+      case NEON_SQRSHL:
+        sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
+        break;
+      case NEON_UHADD:
+        add(vf, rd, rn, rm).Uhalve(vf);
+        break;
+      case NEON_URHADD:
+        add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
+        break;
+      case NEON_SHADD:
+        add(vf, rd, rn, rm).Halve(vf);
+        break;
+      case NEON_SRHADD:
+        add(vf, rd, rn, rm).Halve(vf).Round(vf);
+        break;
+      case NEON_UHSUB:
+        sub(vf, rd, rn, rm).Uhalve(vf);
+        break;
+      case NEON_SHSUB:
+        sub(vf, rd, rn, rm).Halve(vf);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+    }
+  }
+}
+
+
+void Simulator::VisitNEON3Different(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr);
+  VectorFormat vf = nfd.GetVectorFormat();
+  VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  SimVRegister& rm = vreg(instr->Rm());
+
+  switch (instr->Mask(NEON3DifferentMask)) {
+    case NEON_PMULL:    pmull(vf_l, rd, rn, rm); break;
+    case NEON_PMULL2:   pmull2(vf_l, rd, rn, rm); break;
+    case NEON_UADDL:    uaddl(vf_l, rd, rn, rm); break;
+    case NEON_UADDL2:   uaddl2(vf_l, rd, rn, rm); break;
+    case NEON_SADDL:    saddl(vf_l, rd, rn, rm); break;
+    case NEON_SADDL2:   saddl2(vf_l, rd, rn, rm); break;
+    case NEON_USUBL:    usubl(vf_l, rd, rn, rm); break;
+    case NEON_USUBL2:   usubl2(vf_l, rd, rn, rm); break;
+    case NEON_SSUBL:    ssubl(vf_l, rd, rn, rm); break;
+    case NEON_SSUBL2:   ssubl2(vf_l, rd, rn, rm); break;
+    case NEON_SABAL:    sabal(vf_l, rd, rn, rm); break;
+    case NEON_SABAL2:   sabal2(vf_l, rd, rn, rm); break;
+    case NEON_UABAL:    uabal(vf_l, rd, rn, rm); break;
+    case NEON_UABAL2:   uabal2(vf_l, rd, rn, rm); break;
+    case NEON_SABDL:    sabdl(vf_l, rd, rn, rm); break;
+    case NEON_SABDL2:   sabdl2(vf_l, rd, rn, rm); break;
+    case NEON_UABDL:    uabdl(vf_l, rd, rn, rm); break;
+    case NEON_UABDL2:   uabdl2(vf_l, rd, rn, rm); break;
+    case NEON_SMLAL:    smlal(vf_l, rd, rn, rm); break;
+    case NEON_SMLAL2:   smlal2(vf_l, rd, rn, rm); break;
+    case NEON_UMLAL:    umlal(vf_l, rd, rn, rm); break;
+    case NEON_UMLAL2:   umlal2(vf_l, rd, rn, rm); break;
+    case NEON_SMLSL:    smlsl(vf_l, rd, rn, rm); break;
+    case NEON_SMLSL2:   smlsl2(vf_l, rd, rn, rm); break;
+    case NEON_UMLSL:    umlsl(vf_l, rd, rn, rm); break;
+    case NEON_UMLSL2:   umlsl2(vf_l, rd, rn, rm); break;
+    case NEON_SMULL:    smull(vf_l, rd, rn, rm); break;
+    case NEON_SMULL2:   smull2(vf_l, rd, rn, rm); break;
+    case NEON_UMULL:    umull(vf_l, rd, rn, rm); break;
+    case NEON_UMULL2:   umull2(vf_l, rd, rn, rm); break;
+    case NEON_SQDMLAL:  sqdmlal(vf_l, rd, rn, rm); break;
+    case NEON_SQDMLAL2: sqdmlal2(vf_l, rd, rn, rm); break;
+    case NEON_SQDMLSL:  sqdmlsl(vf_l, rd, rn, rm); break;
+    case NEON_SQDMLSL2: sqdmlsl2(vf_l, rd, rn, rm); break;
+    case NEON_SQDMULL:  sqdmull(vf_l, rd, rn, rm); break;
+    case NEON_SQDMULL2: sqdmull2(vf_l, rd, rn, rm); break;
+    case NEON_UADDW:    uaddw(vf_l, rd, rn, rm); break;
+    case NEON_UADDW2:   uaddw2(vf_l, rd, rn, rm); break;
+    case NEON_SADDW:    saddw(vf_l, rd, rn, rm); break;
+    case NEON_SADDW2:   saddw2(vf_l, rd, rn, rm); break;
+    case NEON_USUBW:    usubw(vf_l, rd, rn, rm); break;
+    case NEON_USUBW2:   usubw2(vf_l, rd, rn, rm); break;
+    case NEON_SSUBW:    ssubw(vf_l, rd, rn, rm); break;
+    case NEON_SSUBW2:   ssubw2(vf_l, rd, rn, rm); break;
+    case NEON_ADDHN:    addhn(vf, rd, rn, rm); break;
+    case NEON_ADDHN2:   addhn2(vf, rd, rn, rm); break;
+    case NEON_RADDHN:   raddhn(vf, rd, rn, rm); break;
+    case NEON_RADDHN2:  raddhn2(vf, rd, rn, rm); break;
+    case NEON_SUBHN:    subhn(vf, rd, rn, rm); break;
+    case NEON_SUBHN2:   subhn2(vf, rd, rn, rm); break;
+    case NEON_RSUBHN:   rsubhn(vf, rd, rn, rm); break;
+    case NEON_RSUBHN2:  rsubhn2(vf, rd, rn, rm); break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr);
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+
+  // The input operand's VectorFormat is passed for these instructions.
+  if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+    VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
+
+    switch (instr->Mask(NEONAcrossLanesFPMask)) {
+      case NEON_FMAXV: fmaxv(vf, rd, rn); break;
+      case NEON_FMINV: fminv(vf, rd, rn); break;
+      case NEON_FMAXNMV: fmaxnmv(vf, rd, rn); break;
+      case NEON_FMINNMV: fminnmv(vf, rd, rn); break;
+      default:
+        VIXL_UNIMPLEMENTED();
+    }
+  } else {
+    VectorFormat vf = nfd.GetVectorFormat();
+
+    switch (instr->Mask(NEONAcrossLanesMask)) {
+      case NEON_ADDV:   addv(vf, rd, rn); break;
+      case NEON_SMAXV:  smaxv(vf, rd, rn); break;
+      case NEON_SMINV:  sminv(vf, rd, rn); break;
+      case NEON_UMAXV:  umaxv(vf, rd, rn); break;
+      case NEON_UMINV:  uminv(vf, rd, rn); break;
+      case NEON_SADDLV: saddlv(vf, rd, rn); break;
+      case NEON_UADDLV: uaddlv(vf, rd, rn); break;
+      default:
+        VIXL_UNIMPLEMENTED();
+    }
+  }
+}
+
+
+void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr);
+  VectorFormat vf_r = nfd.GetVectorFormat();
+  VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+
+  ByElementOp Op = NULL;
+
+  int rm_reg = instr->Rm();
+  int index = (instr->NEONH() << 1) | instr->NEONL();
+  if (instr->NEONSize() == 1) {
+    rm_reg &= 0xf;
+    index = (index << 1) | instr->NEONM();
+  }
+
+  switch (instr->Mask(NEONByIndexedElementMask)) {
+    case NEON_MUL_byelement: Op = &Simulator::mul; vf = vf_r; break;
+    case NEON_MLA_byelement: Op = &Simulator::mla; vf = vf_r; break;
+    case NEON_MLS_byelement: Op = &Simulator::mls; vf = vf_r; break;
+    case NEON_SQDMULH_byelement: Op = &Simulator::sqdmulh; vf = vf_r; break;
+    case NEON_SQRDMULH_byelement: Op = &Simulator::sqrdmulh; vf = vf_r; break;
+    case NEON_SMULL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::smull2;
+      } else {
+        Op = &Simulator::smull;
+      }
+      break;
+    case NEON_UMULL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::umull2;
+      } else {
+        Op = &Simulator::umull;
+      }
+      break;
+    case NEON_SMLAL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::smlal2;
+      } else {
+        Op = &Simulator::smlal;
+      }
+      break;
+    case NEON_UMLAL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::umlal2;
+      } else {
+        Op = &Simulator::umlal;
+      }
+      break;
+    case NEON_SMLSL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::smlsl2;
+      } else {
+        Op = &Simulator::smlsl;
+      }
+      break;
+    case NEON_UMLSL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::umlsl2;
+      } else {
+        Op = &Simulator::umlsl;
+      }
+      break;
+    case NEON_SQDMULL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::sqdmull2;
+      } else {
+        Op = &Simulator::sqdmull;
+      }
+      break;
+    case NEON_SQDMLAL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::sqdmlal2;
+      } else {
+        Op = &Simulator::sqdmlal;
+      }
+      break;
+    case NEON_SQDMLSL_byelement:
+      if (instr->Mask(NEON_Q)) {
+        Op = &Simulator::sqdmlsl2;
+      } else {
+        Op = &Simulator::sqdmlsl;
+      }
+      break;
+    default:
+      index = instr->NEONH();
+      if ((instr->FPType() & 1) == 0) {
+        index = (index << 1) | instr->NEONL();
+      }
+
+      vf = nfd.GetVectorFormat(nfd.FPFormatMap());
+
+      switch (instr->Mask(NEONByIndexedElementFPMask)) {
+        case NEON_FMUL_byelement: Op = &Simulator::fmul; break;
+        case NEON_FMLA_byelement: Op = &Simulator::fmla; break;
+        case NEON_FMLS_byelement: Op = &Simulator::fmls; break;
+        case NEON_FMULX_byelement: Op = &Simulator::fmulx; break;
+        default: VIXL_UNIMPLEMENTED();
+      }
+  }
+
+  (this->*Op)(vf, rd, rn, vreg(rm_reg), index);
+}
+
+
+void Simulator::VisitNEONCopy(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  int imm5 = instr->ImmNEON5();
+  int tz = CountTrailingZeros(imm5, 32);
+  int reg_index = imm5 >> (tz + 1);
+
+  if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
+    int imm4 = instr->ImmNEON4();
+    int rn_index = imm4 >> tz;
+    ins_element(vf, rd, reg_index, rn, rn_index);
+  } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
+    ins_immediate(vf, rd, reg_index, xreg(instr->Rn()));
+  } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
+    uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
+    value &= MaxUintFromFormat(vf);
+    set_xreg(instr->Rd(), value);
+  } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
+    int64_t value = LogicVRegister(rn).Int(vf, reg_index);
+    if (instr->NEONQ()) {
+      set_xreg(instr->Rd(), value);
+    } else {
+      set_wreg(instr->Rd(), (int32_t)value);
+    }
+  } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
+    dup_element(vf, rd, rn, reg_index);
+  } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
+    dup_immediate(vf, rd, xreg(instr->Rn()));
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitNEONExtract(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  SimVRegister& rm = vreg(instr->Rm());
+  if (instr->Mask(NEONExtractMask) == NEON_EXT) {
+    int index = instr->ImmNEONExt();
+    ext(vf, rd, rn, rm, index);
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
+                                               AddrMode addr_mode) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  uint64_t addr_base = xreg(instr->Rn(), Reg31IsStackPointer);
+  int reg_size = RegisterSizeInBytesFromFormat(vf);
+
+  int reg[4];
+  uint64_t addr[4];
+  for (int i = 0; i < 4; i++) {
+    reg[i] = (instr->Rt() + i) % kNumberOfVRegisters;
+    addr[i] = addr_base + (i * reg_size);
+  }
+  int count = 1;
+  bool log_read = true;
+
+  Instr itype = instr->Mask(NEONLoadStoreMultiStructMask);
+  if (((itype == NEON_LD1_1v) || (itype == NEON_LD1_2v) ||
+       (itype == NEON_LD1_3v) || (itype == NEON_LD1_4v) ||
+       (itype == NEON_ST1_1v) || (itype == NEON_ST1_2v) ||
+       (itype == NEON_ST1_3v) || (itype == NEON_ST1_4v)) &&
+      (instr->Bits(20, 16) != 0)) {
+    VIXL_UNREACHABLE();
+  }
+
+  // We use the PostIndex mask here, as it works in this case for both Offset
+  // and PostIndex addressing.
+  switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
+    case NEON_LD1_4v:
+    case NEON_LD1_4v_post: ld1(vf, vreg(reg[3]), addr[3]); count++;
+      VIXL_FALLTHROUGH();
+    case NEON_LD1_3v:
+    case NEON_LD1_3v_post: ld1(vf, vreg(reg[2]), addr[2]); count++;
+      VIXL_FALLTHROUGH();
+    case NEON_LD1_2v:
+    case NEON_LD1_2v_post: ld1(vf, vreg(reg[1]), addr[1]); count++;
+      VIXL_FALLTHROUGH();
+    case NEON_LD1_1v:
+    case NEON_LD1_1v_post:
+      ld1(vf, vreg(reg[0]), addr[0]);
+      log_read = true;
+      break;
+    case NEON_ST1_4v:
+    case NEON_ST1_4v_post: st1(vf, vreg(reg[3]), addr[3]); count++;
+      VIXL_FALLTHROUGH();
+    case NEON_ST1_3v:
+    case NEON_ST1_3v_post: st1(vf, vreg(reg[2]), addr[2]); count++;
+      VIXL_FALLTHROUGH();
+    case NEON_ST1_2v:
+    case NEON_ST1_2v_post: st1(vf, vreg(reg[1]), addr[1]); count++;
+      VIXL_FALLTHROUGH();
+    case NEON_ST1_1v:
+    case NEON_ST1_1v_post:
+      st1(vf, vreg(reg[0]), addr[0]);
+      log_read = false;
+      break;
+    case NEON_LD2_post:
+    case NEON_LD2:
+      ld2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]);
+      count = 2;
+      break;
+    case NEON_ST2:
+    case NEON_ST2_post:
+      st2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]);
+      count = 2;
+      break;
+    case NEON_LD3_post:
+    case NEON_LD3:
+      ld3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]);
+      count = 3;
+      break;
+    case NEON_ST3:
+    case NEON_ST3_post:
+      st3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]);
+      count = 3;
+      break;
+    case NEON_ST4:
+    case NEON_ST4_post:
+      st4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]),
+          addr[0]);
+      count = 4;
+      break;
+    case NEON_LD4_post:
+    case NEON_LD4:
+      ld4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]),
+          addr[0]);
+      count = 4;
+      break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+
+  // Explicitly log the register update whilst we have type information.
+  for (int i = 0; i < count; i++) {
+    // For de-interleaving loads, only print the base address.
+    int lane_size = LaneSizeInBytesFromFormat(vf);
+    PrintRegisterFormat format = GetPrintRegisterFormatTryFP(
+        GetPrintRegisterFormatForSize(reg_size, lane_size));
+    if (log_read) {
+      LogVRead(addr_base, reg[i], format);
+    } else {
+      LogVWrite(addr_base, reg[i], format);
+    }
+  }
+
+  if (addr_mode == PostIndex) {
+    int rm = instr->Rm();
+    // The immediate post index addressing mode is indicated by rm = 31.
+    // The immediate is implied by the number of vector registers used.
+    addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count
+                            : xreg(rm);
+    set_xreg(instr->Rn(), addr_base);
+  } else {
+    VIXL_ASSERT(addr_mode == Offset);
+  }
+}
+
+
+void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
+  NEONLoadStoreMultiStructHelper(instr, Offset);
+}
+
+
+void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
+    const Instruction* instr) {
+  NEONLoadStoreMultiStructHelper(instr, PostIndex);
+}
+
+
+void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
+                                                AddrMode addr_mode) {
+  uint64_t addr = xreg(instr->Rn(), Reg31IsStackPointer);
+  int rt = instr->Rt();
+
+  Instr itype = instr->Mask(NEONLoadStoreSingleStructMask);
+  if (((itype == NEON_LD1_b) || (itype == NEON_LD1_h) ||
+       (itype == NEON_LD1_s) || (itype == NEON_LD1_d)) &&
+      (instr->Bits(20, 16) != 0)) {
+    VIXL_UNREACHABLE();
+  }
+
+  // We use the PostIndex mask here, as it works in this case for both Offset
+  // and PostIndex addressing.
+  bool do_load = false;
+
+  bool replicating = false;
+
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+  VectorFormat vf_t = nfd.GetVectorFormat();
+
+  VectorFormat vf = kFormat16B;
+  switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
+    case NEON_LD1_b:
+    case NEON_LD1_b_post:
+    case NEON_LD2_b:
+    case NEON_LD2_b_post:
+    case NEON_LD3_b:
+    case NEON_LD3_b_post:
+    case NEON_LD4_b:
+    case NEON_LD4_b_post: do_load = true;
+      VIXL_FALLTHROUGH();
+    case NEON_ST1_b:
+    case NEON_ST1_b_post:
+    case NEON_ST2_b:
+    case NEON_ST2_b_post:
+    case NEON_ST3_b:
+    case NEON_ST3_b_post:
+    case NEON_ST4_b:
+    case NEON_ST4_b_post: break;
+
+    case NEON_LD1_h:
+    case NEON_LD1_h_post:
+    case NEON_LD2_h:
+    case NEON_LD2_h_post:
+    case NEON_LD3_h:
+    case NEON_LD3_h_post:
+    case NEON_LD4_h:
+    case NEON_LD4_h_post: do_load = true;
+      VIXL_FALLTHROUGH();
+    case NEON_ST1_h:
+    case NEON_ST1_h_post:
+    case NEON_ST2_h:
+    case NEON_ST2_h_post:
+    case NEON_ST3_h:
+    case NEON_ST3_h_post:
+    case NEON_ST4_h:
+    case NEON_ST4_h_post: vf = kFormat8H; break;
+    case NEON_LD1_s:
+    case NEON_LD1_s_post:
+    case NEON_LD2_s:
+    case NEON_LD2_s_post:
+    case NEON_LD3_s:
+    case NEON_LD3_s_post:
+    case NEON_LD4_s:
+    case NEON_LD4_s_post: do_load = true;
+      VIXL_FALLTHROUGH();
+    case NEON_ST1_s:
+    case NEON_ST1_s_post:
+    case NEON_ST2_s:
+    case NEON_ST2_s_post:
+    case NEON_ST3_s:
+    case NEON_ST3_s_post:
+    case NEON_ST4_s:
+    case NEON_ST4_s_post: {
+      VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
+      VIXL_STATIC_ASSERT(
+          (NEON_LD1_s_post | (1 << NEONLSSize_offset)) == NEON_LD1_d_post);
+      VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
+      VIXL_STATIC_ASSERT(
+          (NEON_ST1_s_post | (1 << NEONLSSize_offset)) == NEON_ST1_d_post);
+      vf = ((instr->NEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
+      break;
+    }
+
+    case NEON_LD1R:
+    case NEON_LD1R_post:
+    case NEON_LD2R:
+    case NEON_LD2R_post:
+    case NEON_LD3R:
+    case NEON_LD3R_post:
+    case NEON_LD4R:
+    case NEON_LD4R_post: {
+      vf = vf_t;
+      do_load = true;
+      replicating = true;
+      break;
+    }
+    default: VIXL_UNIMPLEMENTED();
+  }
+
+  PrintRegisterFormat print_format =
+      GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
+  // Make sure that the print_format only includes a single lane.
+  print_format =
+      static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask);
+
+  int esize = LaneSizeInBytesFromFormat(vf);
+  int index_shift = LaneSizeInBytesLog2FromFormat(vf);
+  int lane = instr->NEONLSIndex(index_shift);
+  int scale = 0;
+  int rt2 = (rt + 1) % kNumberOfVRegisters;
+  int rt3 = (rt2 + 1) % kNumberOfVRegisters;
+  int rt4 = (rt3 + 1) % kNumberOfVRegisters;
+  switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
+    case NEONLoadStoreSingle1:
+      scale = 1;
+      if (do_load) {
+        if (replicating) {
+          ld1r(vf, vreg(rt), addr);
+        } else  {
+          ld1(vf, vreg(rt), lane, addr);
+        }
+        LogVRead(addr, rt, print_format, lane);
+      } else {
+        st1(vf, vreg(rt), lane, addr);
+        LogVWrite(addr, rt, print_format, lane);
+      }
+      break;
+    case NEONLoadStoreSingle2:
+      scale = 2;
+      if (do_load) {
+        if (replicating) {
+          ld2r(vf, vreg(rt), vreg(rt2), addr);
+        } else {
+          ld2(vf, vreg(rt), vreg(rt2), lane, addr);
+        }
+        LogVRead(addr, rt, print_format, lane);
+        LogVRead(addr + esize, rt2, print_format, lane);
+      } else {
+        st2(vf, vreg(rt), vreg(rt2), lane, addr);
+        LogVWrite(addr, rt, print_format, lane);
+        LogVWrite(addr + esize, rt2, print_format, lane);
+      }
+      break;
+    case NEONLoadStoreSingle3:
+      scale = 3;
+      if (do_load) {
+        if (replicating) {
+          ld3r(vf, vreg(rt), vreg(rt2), vreg(rt3), addr);
+        } else {
+          ld3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr);
+        }
+        LogVRead(addr, rt, print_format, lane);
+        LogVRead(addr + esize, rt2, print_format, lane);
+        LogVRead(addr + (2 * esize), rt3, print_format, lane);
+      } else {
+        st3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr);
+        LogVWrite(addr, rt, print_format, lane);
+        LogVWrite(addr + esize, rt2, print_format, lane);
+        LogVWrite(addr + (2 * esize), rt3, print_format, lane);
+      }
+      break;
+    case NEONLoadStoreSingle4:
+      scale = 4;
+      if (do_load) {
+        if (replicating) {
+          ld4r(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), addr);
+        } else {
+          ld4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr);
+        }
+        LogVRead(addr, rt, print_format, lane);
+        LogVRead(addr + esize, rt2, print_format, lane);
+        LogVRead(addr + (2 * esize), rt3, print_format, lane);
+        LogVRead(addr + (3 * esize), rt4, print_format, lane);
+      } else {
+        st4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr);
+        LogVWrite(addr, rt, print_format, lane);
+        LogVWrite(addr + esize, rt2, print_format, lane);
+        LogVWrite(addr + (2 * esize), rt3, print_format, lane);
+        LogVWrite(addr + (3 * esize), rt4, print_format, lane);
+      }
+      break;
+    default: VIXL_UNIMPLEMENTED();
+  }
+
+  if (addr_mode == PostIndex) {
+    int rm = instr->Rm();
+    int lane_size = LaneSizeInBytesFromFormat(vf);
+    set_xreg(instr->Rn(), addr + ((rm == 31) ? (scale * lane_size) : xreg(rm)));
+  }
+}
+
+
+void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
+  NEONLoadStoreSingleStructHelper(instr, Offset);
+}
+
+
+void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
+    const Instruction* instr) {
+  NEONLoadStoreSingleStructHelper(instr, PostIndex);
+}
+
+
+void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
+  SimVRegister& rd = vreg(instr->Rd());
+  int cmode = instr->NEONCmode();
+  int cmode_3_1 = (cmode >> 1) & 7;
+  int cmode_3 = (cmode >> 3) & 1;
+  int cmode_2 = (cmode >> 2) & 1;
+  int cmode_1 = (cmode >> 1) & 1;
+  int cmode_0 = cmode & 1;
+  int q = instr->NEONQ();
+  int op_bit = instr->NEONModImmOp();
+  uint64_t imm8  = instr->ImmNEONabcdefgh();
+
+  // Find the format and immediate value
+  uint64_t imm = 0;
+  VectorFormat vform = kFormatUndefined;
+  switch (cmode_3_1) {
+    case 0x0:
+    case 0x1:
+    case 0x2:
+    case 0x3:
+      vform = (q == 1) ? kFormat4S : kFormat2S;
+      imm = imm8 << (8 * cmode_3_1);
+      break;
+    case 0x4:
+    case 0x5:
+      vform = (q == 1) ? kFormat8H : kFormat4H;
+      imm = imm8 << (8 * cmode_1);
+      break;
+    case 0x6:
+      vform = (q == 1) ? kFormat4S : kFormat2S;
+      if (cmode_0 == 0) {
+        imm = imm8 << 8  | 0x000000ff;
+      } else {
+        imm = imm8 << 16 | 0x0000ffff;
+      }
+      break;
+    case 0x7:
+      if (cmode_0 == 0 && op_bit == 0) {
+        vform = q ? kFormat16B : kFormat8B;
+        imm = imm8;
+      } else if (cmode_0 == 0 && op_bit == 1) {
+        vform = q ? kFormat2D : kFormat1D;
+        imm = 0;
+        for (int i = 0; i < 8; ++i) {
+          if (imm8 & (1ULL << i)) {
+            imm |= (UINT64_C(0xff) << (8 * i));
+          }
+        }
+      } else {  // cmode_0 == 1, cmode == 0xf.
+        if (op_bit == 0) {
+          vform = q ? kFormat4S : kFormat2S;
+          imm = FloatToRawbits(instr->ImmNEONFP32());
+        } else if (q == 1) {
+          vform = kFormat2D;
+          imm = DoubleToRawbits(instr->ImmNEONFP64());
+        } else {
+          VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
+          VisitUnallocated(instr);
+        }
+      }
+      break;
+    default: VIXL_UNREACHABLE(); break;
+  }
+
+  // Find the operation
+  NEONModifiedImmediateOp op;
+  if (cmode_3 == 0) {
+    if (cmode_0 == 0) {
+      op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
+    } else {  // cmode<0> == '1'
+      op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
+    }
+  } else {  // cmode<3> == '1'
+    if (cmode_2 == 0) {
+      if (cmode_0 == 0) {
+        op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
+      } else {  // cmode<0> == '1'
+        op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
+      }
+    } else {  // cmode<2> == '1'
+       if (cmode_1 == 0) {
+         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
+       } else {  // cmode<1> == '1'
+         if (cmode_0 == 0) {
+           op = NEONModifiedImmediate_MOVI;
+         } else {  // cmode<0> == '1'
+           op = NEONModifiedImmediate_MOVI;
+         }
+       }
+    }
+  }
+
+  // Call the logic function
+  if (op == NEONModifiedImmediate_ORR) {
+    orr(vform, rd, rd, imm);
+  } else if (op == NEONModifiedImmediate_BIC) {
+    bic(vform, rd, rd, imm);
+  } else  if (op == NEONModifiedImmediate_MOVI) {
+    movi(vform, rd, imm);
+  } else if (op == NEONModifiedImmediate_MVNI) {
+    mvni(vform, rd, imm);
+  } else {
+    VisitUnimplemented(instr);
+  }
+}
+
+
+void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+
+  if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
+    // These instructions all use a two bit size field, except NOT and RBIT,
+    // which use the field to encode the operation.
+    switch (instr->Mask(NEONScalar2RegMiscMask)) {
+      case NEON_CMEQ_zero_scalar: cmp(vf, rd, rn, 0, eq); break;
+      case NEON_CMGE_zero_scalar: cmp(vf, rd, rn, 0, ge); break;
+      case NEON_CMGT_zero_scalar: cmp(vf, rd, rn, 0, gt); break;
+      case NEON_CMLT_zero_scalar: cmp(vf, rd, rn, 0, lt); break;
+      case NEON_CMLE_zero_scalar: cmp(vf, rd, rn, 0, le); break;
+      case NEON_ABS_scalar:       abs(vf, rd, rn); break;
+      case NEON_SQABS_scalar:     abs(vf, rd, rn).SignedSaturate(vf); break;
+      case NEON_NEG_scalar:       neg(vf, rd, rn); break;
+      case NEON_SQNEG_scalar:     neg(vf, rd, rn).SignedSaturate(vf); break;
+      case NEON_SUQADD_scalar:    suqadd(vf, rd, rn); break;
+      case NEON_USQADD_scalar:    usqadd(vf, rd, rn); break;
+      default: VIXL_UNIMPLEMENTED(); break;
+    }
+  } else {
+    VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
+    FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+
+    // These instructions all use a one bit size field, except SQXTUN, SQXTN
+    // and UQXTN, which use a two bit size field.
+    switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
+      case NEON_FRECPE_scalar:     frecpe(fpf, rd, rn, fpcr_rounding); break;
+      case NEON_FRECPX_scalar:     frecpx(fpf, rd, rn); break;
+      case NEON_FRSQRTE_scalar:    frsqrte(fpf, rd, rn); break;
+      case NEON_FCMGT_zero_scalar: fcmp_zero(fpf, rd, rn, gt); break;
+      case NEON_FCMGE_zero_scalar: fcmp_zero(fpf, rd, rn, ge); break;
+      case NEON_FCMEQ_zero_scalar: fcmp_zero(fpf, rd, rn, eq); break;
+      case NEON_FCMLE_zero_scalar: fcmp_zero(fpf, rd, rn, le); break;
+      case NEON_FCMLT_zero_scalar: fcmp_zero(fpf, rd, rn, lt); break;
+      case NEON_SCVTF_scalar:      scvtf(fpf, rd, rn, 0, fpcr_rounding); break;
+      case NEON_UCVTF_scalar:      ucvtf(fpf, rd, rn, 0, fpcr_rounding); break;
+      case NEON_FCVTNS_scalar: fcvts(fpf, rd, rn, FPTieEven); break;
+      case NEON_FCVTNU_scalar: fcvtu(fpf, rd, rn, FPTieEven); break;
+      case NEON_FCVTPS_scalar: fcvts(fpf, rd, rn, FPPositiveInfinity); break;
+      case NEON_FCVTPU_scalar: fcvtu(fpf, rd, rn, FPPositiveInfinity); break;
+      case NEON_FCVTMS_scalar: fcvts(fpf, rd, rn, FPNegativeInfinity); break;
+      case NEON_FCVTMU_scalar: fcvtu(fpf, rd, rn, FPNegativeInfinity); break;
+      case NEON_FCVTZS_scalar: fcvts(fpf, rd, rn, FPZero); break;
+      case NEON_FCVTZU_scalar: fcvtu(fpf, rd, rn, FPZero); break;
+      case NEON_FCVTAS_scalar: fcvts(fpf, rd, rn, FPTieAway); break;
+      case NEON_FCVTAU_scalar: fcvtu(fpf, rd, rn, FPTieAway); break;
+      case NEON_FCVTXN_scalar:
+        // Unlike all of the other FP instructions above, fcvtxn encodes dest
+        // size S as size<0>=1. There's only one case, so we ignore the form.
+        VIXL_ASSERT(instr->Bit(22) == 1);
+        fcvtxn(kFormatS, rd, rn);
+        break;
+      default:
+        switch (instr->Mask(NEONScalar2RegMiscMask)) {
+          case NEON_SQXTN_scalar:  sqxtn(vf, rd, rn); break;
+          case NEON_UQXTN_scalar:  uqxtn(vf, rd, rn); break;
+          case NEON_SQXTUN_scalar: sqxtun(vf, rd, rn); break;
+          default:
+            VIXL_UNIMPLEMENTED();
+        }
+    }
+  }
+}
+
+
+void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  SimVRegister& rm = vreg(instr->Rm());
+  switch (instr->Mask(NEONScalar3DiffMask)) {
+    case NEON_SQDMLAL_scalar: sqdmlal(vf, rd, rn, rm); break;
+    case NEON_SQDMLSL_scalar: sqdmlsl(vf, rd, rn, rm); break;
+    case NEON_SQDMULL_scalar: sqdmull(vf, rd, rn, rm); break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  SimVRegister& rm = vreg(instr->Rm());
+
+  if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
+    vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
+    switch (instr->Mask(NEONScalar3SameFPMask)) {
+      case NEON_FMULX_scalar:   fmulx(vf, rd, rn, rm); break;
+      case NEON_FACGE_scalar:   fabscmp(vf, rd, rn, rm, ge); break;
+      case NEON_FACGT_scalar:   fabscmp(vf, rd, rn, rm, gt); break;
+      case NEON_FCMEQ_scalar:   fcmp(vf, rd, rn, rm, eq); break;
+      case NEON_FCMGE_scalar:   fcmp(vf, rd, rn, rm, ge); break;
+      case NEON_FCMGT_scalar:   fcmp(vf, rd, rn, rm, gt); break;
+      case NEON_FRECPS_scalar:  frecps(vf, rd, rn, rm); break;
+      case NEON_FRSQRTS_scalar: frsqrts(vf, rd, rn, rm); break;
+      case NEON_FABD_scalar:    fabd(vf, rd, rn, rm); break;
+      default:
+        VIXL_UNIMPLEMENTED();
+    }
+  } else {
+    switch (instr->Mask(NEONScalar3SameMask)) {
+      case NEON_ADD_scalar:      add(vf, rd, rn, rm); break;
+      case NEON_SUB_scalar:      sub(vf, rd, rn, rm); break;
+      case NEON_CMEQ_scalar:     cmp(vf, rd, rn, rm, eq); break;
+      case NEON_CMGE_scalar:     cmp(vf, rd, rn, rm, ge); break;
+      case NEON_CMGT_scalar:     cmp(vf, rd, rn, rm, gt); break;
+      case NEON_CMHI_scalar:     cmp(vf, rd, rn, rm, hi); break;
+      case NEON_CMHS_scalar:     cmp(vf, rd, rn, rm, hs); break;
+      case NEON_CMTST_scalar:    cmptst(vf, rd, rn, rm); break;
+      case NEON_USHL_scalar:     ushl(vf, rd, rn, rm); break;
+      case NEON_SSHL_scalar:     sshl(vf, rd, rn, rm); break;
+      case NEON_SQDMULH_scalar:  sqdmulh(vf, rd, rn, rm); break;
+      case NEON_SQRDMULH_scalar: sqrdmulh(vf, rd, rn, rm); break;
+      case NEON_UQADD_scalar:
+        add(vf, rd, rn, rm).UnsignedSaturate(vf);
+        break;
+      case NEON_SQADD_scalar:
+        add(vf, rd, rn, rm).SignedSaturate(vf);
+        break;
+      case NEON_UQSUB_scalar:
+        sub(vf, rd, rn, rm).UnsignedSaturate(vf);
+        break;
+      case NEON_SQSUB_scalar:
+        sub(vf, rd, rn, rm).SignedSaturate(vf);
+        break;
+      case NEON_UQSHL_scalar:
+        ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
+        break;
+      case NEON_SQSHL_scalar:
+        sshl(vf, rd, rn, rm).SignedSaturate(vf);
+        break;
+      case NEON_URSHL_scalar:
+        ushl(vf, rd, rn, rm).Round(vf);
+        break;
+      case NEON_SRSHL_scalar:
+        sshl(vf, rd, rn, rm).Round(vf);
+        break;
+      case NEON_UQRSHL_scalar:
+        ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
+        break;
+      case NEON_SQRSHL_scalar:
+        sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+    }
+  }
+}
+
+
+void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+  VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  ByElementOp Op = NULL;
+
+  int rm_reg = instr->Rm();
+  int index = (instr->NEONH() << 1) | instr->NEONL();
+  if (instr->NEONSize() == 1) {
+    rm_reg &= 0xf;
+    index = (index << 1) | instr->NEONM();
+  }
+
+  switch (instr->Mask(NEONScalarByIndexedElementMask)) {
+    case NEON_SQDMULL_byelement_scalar: Op = &Simulator::sqdmull; break;
+    case NEON_SQDMLAL_byelement_scalar: Op = &Simulator::sqdmlal; break;
+    case NEON_SQDMLSL_byelement_scalar: Op = &Simulator::sqdmlsl; break;
+    case NEON_SQDMULH_byelement_scalar:
+      Op = &Simulator::sqdmulh;
+      vf = vf_r;
+      break;
+    case NEON_SQRDMULH_byelement_scalar:
+      Op = &Simulator::sqrdmulh;
+      vf = vf_r;
+      break;
+    default:
+      vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
+      index = instr->NEONH();
+      if ((instr->FPType() & 1) == 0) {
+        index = (index << 1) | instr->NEONL();
+      }
+      switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
+        case NEON_FMUL_byelement_scalar: Op = &Simulator::fmul; break;
+        case NEON_FMLA_byelement_scalar: Op = &Simulator::fmla; break;
+        case NEON_FMLS_byelement_scalar: Op = &Simulator::fmls; break;
+        case NEON_FMULX_byelement_scalar: Op = &Simulator::fmulx; break;
+        default: VIXL_UNIMPLEMENTED();
+      }
+  }
+
+  (this->*Op)(vf, rd, rn, vreg(rm_reg), index);
+}
+
+
+void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+
+  if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
+    int imm5 = instr->ImmNEON5();
+    int tz = CountTrailingZeros(imm5, 32);
+    int rn_index = imm5 >> (tz + 1);
+    dup_element(vf, rd, rn, rn_index);
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  switch (instr->Mask(NEONScalarPairwiseMask)) {
+    case NEON_ADDP_scalar:    addp(vf, rd, rn); break;
+    case NEON_FADDP_scalar:   faddp(vf, rd, rn); break;
+    case NEON_FMAXP_scalar:   fmaxp(vf, rd, rn); break;
+    case NEON_FMAXNMP_scalar: fmaxnmp(vf, rd, rn); break;
+    case NEON_FMINP_scalar:   fminp(vf, rd, rn); break;
+    case NEON_FMINNMP_scalar: fminnmp(vf, rd, rn); break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+
+  static const NEONFormatMap map = {
+    {22, 21, 20, 19},
+    {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S,
+     NF_D,     NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D}
+  };
+  NEONFormatDecoder nfd(instr, &map);
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh());
+  int immhimmb = instr->ImmNEONImmhImmb();
+  int right_shift = (16 << highestSetBit) - immhimmb;
+  int left_shift = immhimmb - (8 << highestSetBit);
+  switch (instr->Mask(NEONScalarShiftImmediateMask)) {
+    case NEON_SHL_scalar:       shl(vf, rd, rn, left_shift); break;
+    case NEON_SLI_scalar:       sli(vf, rd, rn, left_shift); break;
+    case NEON_SQSHL_imm_scalar: sqshl(vf, rd, rn, left_shift); break;
+    case NEON_UQSHL_imm_scalar: uqshl(vf, rd, rn, left_shift); break;
+    case NEON_SQSHLU_scalar:    sqshlu(vf, rd, rn, left_shift); break;
+    case NEON_SRI_scalar:       sri(vf, rd, rn, right_shift); break;
+    case NEON_SSHR_scalar:      sshr(vf, rd, rn, right_shift); break;
+    case NEON_USHR_scalar:      ushr(vf, rd, rn, right_shift); break;
+    case NEON_SRSHR_scalar:     sshr(vf, rd, rn, right_shift).Round(vf); break;
+    case NEON_URSHR_scalar:     ushr(vf, rd, rn, right_shift).Round(vf); break;
+    case NEON_SSRA_scalar:      ssra(vf, rd, rn, right_shift); break;
+    case NEON_USRA_scalar:      usra(vf, rd, rn, right_shift); break;
+    case NEON_SRSRA_scalar:     srsra(vf, rd, rn, right_shift); break;
+    case NEON_URSRA_scalar:     ursra(vf, rd, rn, right_shift); break;
+    case NEON_UQSHRN_scalar:    uqshrn(vf, rd, rn, right_shift); break;
+    case NEON_UQRSHRN_scalar:   uqrshrn(vf, rd, rn, right_shift); break;
+    case NEON_SQSHRN_scalar:    sqshrn(vf, rd, rn, right_shift); break;
+    case NEON_SQRSHRN_scalar:   sqrshrn(vf, rd, rn, right_shift); break;
+    case NEON_SQSHRUN_scalar:   sqshrun(vf, rd, rn, right_shift); break;
+    case NEON_SQRSHRUN_scalar:  sqrshrun(vf, rd, rn, right_shift); break;
+    case NEON_FCVTZS_imm_scalar: fcvts(vf, rd, rn, FPZero, right_shift); break;
+    case NEON_FCVTZU_imm_scalar: fcvtu(vf, rd, rn, FPZero, right_shift); break;
+    case NEON_SCVTF_imm_scalar:
+      scvtf(vf, rd, rn, right_shift, fpcr_rounding);
+      break;
+    case NEON_UCVTF_imm_scalar:
+      ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+
+  // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
+  // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
+  static const NEONFormatMap map = {
+    {22, 21, 20, 19, 30},
+    {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B, NF_4H,    NF_8H, NF_4H,    NF_8H,
+     NF_2S,    NF_4S,    NF_2S,    NF_4S,  NF_2S,    NF_4S, NF_2S,    NF_4S,
+     NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
+     NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}
+  };
+  NEONFormatDecoder nfd(instr, &map);
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
+  static const NEONFormatMap map_l = {
+    {22, 21, 20, 19},
+    {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}
+  };
+  VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
+
+  int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh());
+  int immhimmb = instr->ImmNEONImmhImmb();
+  int right_shift = (16 << highestSetBit) - immhimmb;
+  int left_shift = immhimmb - (8 << highestSetBit);
+
+  switch (instr->Mask(NEONShiftImmediateMask)) {
+    case NEON_SHL:    shl(vf, rd, rn, left_shift); break;
+    case NEON_SLI:    sli(vf, rd, rn, left_shift); break;
+    case NEON_SQSHLU: sqshlu(vf, rd, rn, left_shift); break;
+    case NEON_SRI:    sri(vf, rd, rn, right_shift); break;
+    case NEON_SSHR:   sshr(vf, rd, rn, right_shift); break;
+    case NEON_USHR:   ushr(vf, rd, rn, right_shift); break;
+    case NEON_SRSHR:  sshr(vf, rd, rn, right_shift).Round(vf); break;
+    case NEON_URSHR:  ushr(vf, rd, rn, right_shift).Round(vf); break;
+    case NEON_SSRA:   ssra(vf, rd, rn, right_shift); break;
+    case NEON_USRA:   usra(vf, rd, rn, right_shift); break;
+    case NEON_SRSRA:  srsra(vf, rd, rn, right_shift); break;
+    case NEON_URSRA:  ursra(vf, rd, rn, right_shift); break;
+    case NEON_SQSHL_imm: sqshl(vf, rd, rn, left_shift); break;
+    case NEON_UQSHL_imm: uqshl(vf, rd, rn, left_shift); break;
+    case NEON_SCVTF_imm: scvtf(vf, rd, rn, right_shift, fpcr_rounding); break;
+    case NEON_UCVTF_imm: ucvtf(vf, rd, rn, right_shift, fpcr_rounding); break;
+    case NEON_FCVTZS_imm: fcvts(vf, rd, rn, FPZero, right_shift); break;
+    case NEON_FCVTZU_imm: fcvtu(vf, rd, rn, FPZero, right_shift); break;
+    case NEON_SSHLL:
+      vf = vf_l;
+      if (instr->Mask(NEON_Q)) {
+        sshll2(vf, rd, rn, left_shift);
+      } else {
+        sshll(vf, rd, rn, left_shift);
+      }
+      break;
+    case NEON_USHLL:
+      vf = vf_l;
+      if (instr->Mask(NEON_Q)) {
+        ushll2(vf, rd, rn, left_shift);
+      } else {
+        ushll(vf, rd, rn, left_shift);
+      }
+      break;
+    case NEON_SHRN:
+      if (instr->Mask(NEON_Q)) {
+        shrn2(vf, rd, rn, right_shift);
+      } else {
+        shrn(vf, rd, rn, right_shift);
+      }
+      break;
+    case NEON_RSHRN:
+      if (instr->Mask(NEON_Q)) {
+        rshrn2(vf, rd, rn, right_shift);
+      } else {
+        rshrn(vf, rd, rn, right_shift);
+      }
+      break;
+    case NEON_UQSHRN:
+      if (instr->Mask(NEON_Q)) {
+        uqshrn2(vf, rd, rn, right_shift);
+      } else {
+        uqshrn(vf, rd, rn, right_shift);
+      }
+      break;
+    case NEON_UQRSHRN:
+      if (instr->Mask(NEON_Q)) {
+        uqrshrn2(vf, rd, rn, right_shift);
+      } else {
+        uqrshrn(vf, rd, rn, right_shift);
+      }
+      break;
+    case NEON_SQSHRN:
+      if (instr->Mask(NEON_Q)) {
+        sqshrn2(vf, rd, rn, right_shift);
+      } else {
+        sqshrn(vf, rd, rn, right_shift);
+      }
+      break;
+    case NEON_SQRSHRN:
+      if (instr->Mask(NEON_Q)) {
+        sqrshrn2(vf, rd, rn, right_shift);
+      } else {
+        sqrshrn(vf, rd, rn, right_shift);
+      }
+      break;
+    case NEON_SQSHRUN:
+      if (instr->Mask(NEON_Q)) {
+        sqshrun2(vf, rd, rn, right_shift);
+      } else {
+        sqshrun(vf, rd, rn, right_shift);
+      }
+      break;
+    case NEON_SQRSHRUN:
+      if (instr->Mask(NEON_Q)) {
+        sqrshrun2(vf, rd, rn, right_shift);
+      } else {
+        sqrshrun(vf, rd, rn, right_shift);
+      }
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitNEONTable(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  SimVRegister& rn2 = vreg((instr->Rn() + 1) % kNumberOfVRegisters);
+  SimVRegister& rn3 = vreg((instr->Rn() + 2) % kNumberOfVRegisters);
+  SimVRegister& rn4 = vreg((instr->Rn() + 3) % kNumberOfVRegisters);
+  SimVRegister& rm = vreg(instr->Rm());
+
+  switch (instr->Mask(NEONTableMask)) {
+    case NEON_TBL_1v: tbl(vf, rd, rn, rm); break;
+    case NEON_TBL_2v: tbl(vf, rd, rn, rn2, rm); break;
+    case NEON_TBL_3v: tbl(vf, rd, rn, rn2, rn3, rm); break;
+    case NEON_TBL_4v: tbl(vf, rd, rn, rn2, rn3, rn4, rm); break;
+    case NEON_TBX_1v: tbx(vf, rd, rn, rm); break;
+    case NEON_TBX_2v: tbx(vf, rd, rn, rn2, rm); break;
+    case NEON_TBX_3v: tbx(vf, rd, rn, rn2, rn3, rm); break;
+    case NEON_TBX_4v: tbx(vf, rd, rn, rn2, rn3, rn4, rm); break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::VisitNEONPerm(const Instruction* instr) {
+  NEONFormatDecoder nfd(instr);
+  VectorFormat vf = nfd.GetVectorFormat();
+
+  SimVRegister& rd = vreg(instr->Rd());
+  SimVRegister& rn = vreg(instr->Rn());
+  SimVRegister& rm = vreg(instr->Rm());
+
+  switch (instr->Mask(NEONPermMask)) {
+    case NEON_TRN1: trn1(vf, rd, rn, rm); break;
+    case NEON_TRN2: trn2(vf, rd, rn, rm); break;
+    case NEON_UZP1: uzp1(vf, rd, rn, rm); break;
+    case NEON_UZP2: uzp2(vf, rd, rn, rm); break;
+    case NEON_ZIP1: zip1(vf, rd, rn, rm); break;
+    case NEON_ZIP2: zip2(vf, rd, rn, rm); break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+void Simulator::DoUnreachable(const Instruction* instr) {
+  VIXL_ASSERT(instr->InstructionBits() == UNDEFINED_INST_PATTERN);
+
+  fprintf(stream_, "Hit UNREACHABLE marker at pc=%p.\n",
+          reinterpret_cast<const void*>(instr));
+  abort();
+}
+
+
+void Simulator::DoTrace(const Instruction* instr) {
+  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+              (instr->ImmException() == kTraceOpcode));
+
+  // Read the arguments encoded inline in the instruction stream.
+  uint32_t parameters;
+  uint32_t command;
+
+  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+  memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
+  memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
+
+  switch (command) {
+    case TRACE_ENABLE:
+      set_trace_parameters(trace_parameters() | parameters);
+      break;
+    case TRACE_DISABLE:
+      set_trace_parameters(trace_parameters() & ~parameters);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  set_pc(instr->InstructionAtOffset(kTraceLength));
+}
+
+
+void Simulator::DoLog(const Instruction* instr) {
+  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+              (instr->ImmException() == kLogOpcode));
+
+  // Read the arguments encoded inline in the instruction stream.
+  uint32_t parameters;
+
+  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+  memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
+
+  // We don't support a one-shot LOG_DISASM.
+  VIXL_ASSERT((parameters & LOG_DISASM) == 0);
+  // Print the requested information.
+  if (parameters & LOG_SYSREGS) PrintSystemRegisters();
+  if (parameters & LOG_REGS) PrintRegisters();
+  if (parameters & LOG_VREGS) PrintVRegisters();
+
+  set_pc(instr->InstructionAtOffset(kLogLength));
+}
+
+
+void Simulator::DoPrintf(const Instruction* instr) {
+  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+              (instr->ImmException() == kPrintfOpcode));
+
+  // Read the arguments encoded inline in the instruction stream.
+  uint32_t arg_count;
+  uint32_t arg_pattern_list;
+  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+  memcpy(&arg_count,
+         instr + kPrintfArgCountOffset,
+         sizeof(arg_count));
+  memcpy(&arg_pattern_list,
+         instr + kPrintfArgPatternListOffset,
+         sizeof(arg_pattern_list));
+
+  VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
+  VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
+
+  // We need to call the host printf function with a set of arguments defined by
+  // arg_pattern_list. Because we don't know the types and sizes of the
+  // arguments, this is very difficult to do in a robust and portable way. To
+  // work around the problem, we pick apart the format string, and print one
+  // format placeholder at a time.
+
+  // Allocate space for the format string. We take a copy, so we can modify it.
+  // Leave enough space for one extra character per expected argument (plus the
+  // '\0' termination).
+  const char * format_base = reg<const char *>(0);
+  VIXL_ASSERT(format_base != NULL);
+  size_t length = strlen(format_base) + 1;
+  char * const format = (char *)js_calloc(length + arg_count);
+
+  // A list of chunks, each with exactly one format placeholder.
+  const char * chunks[kPrintfMaxArgCount];
+
+  // Copy the format string and search for format placeholders.
+  uint32_t placeholder_count = 0;
+  char * format_scratch = format;
+  for (size_t i = 0; i < length; i++) {
+    if (format_base[i] != '%') {
+      *format_scratch++ = format_base[i];
+    } else {
+      if (format_base[i + 1] == '%') {
+        // Ignore explicit "%%" sequences.
+        *format_scratch++ = format_base[i];
+        i++;
+        // Chunks after the first are passed as format strings to printf, so we
+        // need to escape '%' characters in those chunks.
+        if (placeholder_count > 0) *format_scratch++ = format_base[i];
+      } else {
+        VIXL_CHECK(placeholder_count < arg_count);
+        // Insert '\0' before placeholders, and store their locations.
+        *format_scratch++ = '\0';
+        chunks[placeholder_count++] = format_scratch;
+        *format_scratch++ = format_base[i];
+      }
+    }
+  }
+  VIXL_CHECK(placeholder_count == arg_count);
+
+  // Finally, call printf with each chunk, passing the appropriate register
+  // argument. Normally, printf returns the number of bytes transmitted, so we
+  // can emulate a single printf call by adding the result from each chunk. If
+  // any call returns a negative (error) value, though, just return that value.
+
+  printf("%s", clr_printf);
+
+  // Because '\0' is inserted before each placeholder, the first string in
+  // 'format' contains no format placeholders and should be printed literally.
+  int result = printf("%s", format);
+  int pcs_r = 1;      // Start at x1. x0 holds the format string.
+  int pcs_f = 0;      // Start at d0.
+  if (result >= 0) {
+    for (uint32_t i = 0; i < placeholder_count; i++) {
+      int part_result = -1;
+
+      uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
+      arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
+      switch (arg_pattern) {
+        case kPrintfArgW: part_result = printf(chunks[i], wreg(pcs_r++)); break;
+        case kPrintfArgX: part_result = printf(chunks[i], xreg(pcs_r++)); break;
+        case kPrintfArgD: part_result = printf(chunks[i], dreg(pcs_f++)); break;
+        default: VIXL_UNREACHABLE();
+      }
+
+      if (part_result < 0) {
+        // Handle error values.
+        result = part_result;
+        break;
+      }
+
+      result += part_result;
+    }
+  }
+
+  printf("%s", clr_normal);
+
+  // Printf returns its result in x0 (just like the C library's printf).
+  set_xreg(0, result);
+
+  // The printf parameters are inlined in the code, so skip them.
+  set_pc(instr->InstructionAtOffset(kPrintfLength));
+
+  // Set LR as if we'd just called a native printf function.
+  set_lr(pc());
+
+  js_free(format);
+}
+
+}  // namespace vixl
+
+#endif  // JS_SIMULATOR_ARM64
diff --git a/js/src/jit/arm64/vixl/Simulator-vixl.h b/js/src/jit/arm64/vixl/Simulator-vixl.h
new file mode 100644
index 0000000000..af78f5bad0
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Simulator-vixl.h
@@ -0,0 +1,2592 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_SIMULATOR_A64_H_
+#define VIXL_A64_SIMULATOR_A64_H_
+
+#include "jstypes.h"
+
+#ifdef JS_SIMULATOR_ARM64
+
+#include "mozilla/Vector.h"
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+#include "jit/arm64/vixl/Disasm-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "jit/arm64/vixl/Instrument-vixl.h"
+#include "jit/arm64/vixl/MozCachingDecoder.h"
+#include "jit/arm64/vixl/Simulator-Constants-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+#include "jit/IonTypes.h"
+#include "js/AllocPolicy.h"
+#include "vm/MutexIDs.h"
+#include "wasm/WasmSignalHandlers.h"
+
+namespace vixl {
+
+// Representation of memory, with typed getters and setters for access.
+class Memory {
+ public:
+  template <typename T>
+  static T AddressUntag(T address) {
+    // Cast the address using a C-style cast. A reinterpret_cast would be
+    // appropriate, but it can't cast one integral type to another.
+    uint64_t bits = (uint64_t)address;
+    return (T)(bits & ~kAddressTagMask);
+  }
+
+  template <typename T, typename A>
+  static T Read(A address) {
+    T value;
+    address = AddressUntag(address);
+    VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
+                (sizeof(value) == 4) || (sizeof(value) == 8) ||
+                (sizeof(value) == 16));
+    memcpy(&value, reinterpret_cast<const char *>(address), sizeof(value));
+    return value;
+  }
+
+  template <typename T, typename A>
+  static void Write(A address, T value) {
+    address = AddressUntag(address);
+    VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
+                (sizeof(value) == 4) || (sizeof(value) == 8) ||
+                (sizeof(value) == 16));
+    memcpy(reinterpret_cast<char *>(address), &value, sizeof(value));
+  }
+};
+
+// Represent a register (r0-r31, v0-v31).
+template<int kSizeInBytes>
+class SimRegisterBase {
+ public:
+  SimRegisterBase() : written_since_last_log_(false) {}
+
+  // Write the specified value. The value is zero-extended if necessary.
+  template<typename T>
+  void Set(T new_value) {
+    VIXL_STATIC_ASSERT(sizeof(new_value) <= kSizeInBytes);
+    if (sizeof(new_value) < kSizeInBytes) {
+      // All AArch64 registers are zero-extending.
+      memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));
+    }
+    memcpy(value_, &new_value, sizeof(new_value));
+    NotifyRegisterWrite();
+  }
+
+  // Insert a typed value into a register, leaving the rest of the register
+  // unchanged. The lane parameter indicates where in the register the value
+  // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
+  // 0 represents the least significant bits.
+  template<typename T>
+  void Insert(int lane, T new_value) {
+    VIXL_ASSERT(lane >= 0);
+    VIXL_ASSERT((sizeof(new_value) +
+                 (lane * sizeof(new_value))) <= kSizeInBytes);
+    memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value));
+    NotifyRegisterWrite();
+  }
+
+  // Read the value as the specified type. The value is truncated if necessary.
+  template<typename T>
+  T Get(int lane = 0) const {
+    T result;
+    VIXL_ASSERT(lane >= 0);
+    VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes);
+    memcpy(&result, &value_[lane * sizeof(result)], sizeof(result));
+    return result;
+  }
+
+  // TODO: Make this return a map of updated bytes, so that we can highlight
+  // updated lanes for load-and-insert. (That never happens for scalar code, but
+  // NEON has some instructions that can update individual lanes.)
+  bool WrittenSinceLastLog() const {
+    return written_since_last_log_;
+  }
+
+  void NotifyRegisterLogged() {
+    written_since_last_log_ = false;
+  }
+
+ protected:
+  uint8_t value_[kSizeInBytes];
+
+  // Helpers to aid with register tracing.
+  bool written_since_last_log_;
+
+  void NotifyRegisterWrite() {
+    written_since_last_log_ = true;
+  }
+};
+typedef SimRegisterBase<kXRegSizeInBytes> SimRegister;      // r0-r31
+typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister;     // v0-v31
+
+// Representation of a vector register, with typed getters and setters for lanes
+// and additional information to represent lane state.
+class LogicVRegister {
+ public:
+  inline LogicVRegister(SimVRegister& other)  // NOLINT
+      : register_(other) {
+    for (unsigned i = 0; i < sizeof(saturated_) / sizeof(saturated_[0]); i++) {
+      saturated_[i] = kNotSaturated;
+    }
+    for (unsigned i = 0; i < sizeof(round_) / sizeof(round_[0]); i++) {
+      round_[i] = 0;
+    }
+  }
+
+  int64_t Int(VectorFormat vform, int index) const {
+    int64_t element;
+    switch (LaneSizeInBitsFromFormat(vform)) {
+      case 8: element = register_.Get<int8_t>(index); break;
+      case 16: element = register_.Get<int16_t>(index); break;
+      case 32: element = register_.Get<int32_t>(index); break;
+      case 64: element = register_.Get<int64_t>(index); break;
+      default: VIXL_UNREACHABLE(); return 0;
+    }
+    return element;
+  }
+
+  uint64_t Uint(VectorFormat vform, int index) const {
+    uint64_t element;
+    switch (LaneSizeInBitsFromFormat(vform)) {
+      case 8: element = register_.Get<uint8_t>(index); break;
+      case 16: element = register_.Get<uint16_t>(index); break;
+      case 32: element = register_.Get<uint32_t>(index); break;
+      case 64: element = register_.Get<uint64_t>(index); break;
+      default: VIXL_UNREACHABLE(); return 0;
+    }
+    return element;
+  }
+
+  int64_t IntLeftJustified(VectorFormat vform, int index) const {
+    return Int(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
+  }
+
+  uint64_t UintLeftJustified(VectorFormat vform, int index) const {
+    return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
+  }
+
+  void SetInt(VectorFormat vform, int index, int64_t value) const {
+    switch (LaneSizeInBitsFromFormat(vform)) {
+      case 8: register_.Insert(index, static_cast<int8_t>(value)); break;
+      case 16: register_.Insert(index, static_cast<int16_t>(value)); break;
+      case 32: register_.Insert(index, static_cast<int32_t>(value)); break;
+      case 64: register_.Insert(index, static_cast<int64_t>(value)); break;
+      default: VIXL_UNREACHABLE(); return;
+    }
+  }
+
+  void SetUint(VectorFormat vform, int index, uint64_t value) const {
+    switch (LaneSizeInBitsFromFormat(vform)) {
+      case 8: register_.Insert(index, static_cast<uint8_t>(value)); break;
+      case 16: register_.Insert(index, static_cast<uint16_t>(value)); break;
+      case 32: register_.Insert(index, static_cast<uint32_t>(value)); break;
+      case 64: register_.Insert(index, static_cast<uint64_t>(value)); break;
+      default: VIXL_UNREACHABLE(); return;
+    }
+  }
+
+  void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const {
+    switch (LaneSizeInBitsFromFormat(vform)) {
+      case 8: register_.Insert(index, Memory::Read<uint8_t>(addr)); break;
+      case 16: register_.Insert(index, Memory::Read<uint16_t>(addr)); break;
+      case 32: register_.Insert(index, Memory::Read<uint32_t>(addr)); break;
+      case 64: register_.Insert(index, Memory::Read<uint64_t>(addr)); break;
+      default: VIXL_UNREACHABLE(); return;
+    }
+  }
+
+  void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const {
+    uint64_t value = Uint(vform, index);
+    switch (LaneSizeInBitsFromFormat(vform)) {
+      case 8: Memory::Write(addr, static_cast<uint8_t>(value)); break;
+      case 16: Memory::Write(addr, static_cast<uint16_t>(value)); break;
+      case 32: Memory::Write(addr, static_cast<uint32_t>(value)); break;
+      case 64: Memory::Write(addr, value); break;
+    }
+  }
+
+  template <typename T>
+  T Float(int index) const {
+    return register_.Get<T>(index);
+  }
+
+  template <typename T>
+  void SetFloat(int index, T value) const {
+    register_.Insert(index, value);
+  }
+
+  // When setting a result in a register of size less than Q, the top bits of
+  // the Q register must be cleared.
+  void ClearForWrite(VectorFormat vform) const {
+    unsigned size = RegisterSizeInBytesFromFormat(vform);
+    for (unsigned i = size; i < kQRegSizeInBytes; i++) {
+      SetUint(kFormat16B, i, 0);
+    }
+  }
+
+  // Saturation state for each lane of a vector.
+  enum Saturation {
+    kNotSaturated = 0,
+    kSignedSatPositive = 1 << 0,
+    kSignedSatNegative = 1 << 1,
+    kSignedSatMask = kSignedSatPositive | kSignedSatNegative,
+    kSignedSatUndefined = kSignedSatMask,
+    kUnsignedSatPositive = 1 << 2,
+    kUnsignedSatNegative = 1 << 3,
+    kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,
+    kUnsignedSatUndefined = kUnsignedSatMask
+  };
+
+  // Getters for saturation state.
+  Saturation GetSignedSaturation(int index) {
+    return static_cast<Saturation>(saturated_[index] & kSignedSatMask);
+  }
+
+  Saturation GetUnsignedSaturation(int index) {
+    return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);
+  }
+
+  // Setters for saturation state.
+  void ClearSat(int index) {
+    saturated_[index] = kNotSaturated;
+  }
+
+  void SetSignedSat(int index, bool positive) {
+    SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);
+  }
+
+  void SetUnsignedSat(int index, bool positive) {
+    SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);
+  }
+
+  void SetSatFlag(int index, Saturation sat) {
+    saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);
+    VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined);
+    VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined);
+  }
+
+  // Saturate lanes of a vector based on saturation state.
+  LogicVRegister& SignedSaturate(VectorFormat vform) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      Saturation sat = GetSignedSaturation(i);
+      if (sat == kSignedSatPositive) {
+        SetInt(vform, i, MaxIntFromFormat(vform));
+      } else if (sat == kSignedSatNegative) {
+        SetInt(vform, i, MinIntFromFormat(vform));
+      }
+    }
+    return *this;
+  }
+
+  LogicVRegister& UnsignedSaturate(VectorFormat vform) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      Saturation sat = GetUnsignedSaturation(i);
+      if (sat == kUnsignedSatPositive) {
+        SetUint(vform, i, MaxUintFromFormat(vform));
+      } else if (sat == kUnsignedSatNegative) {
+        SetUint(vform, i, 0);
+      }
+    }
+    return *this;
+  }
+
+  // Getter for rounding state.
+  bool GetRounding(int index) {
+    return round_[index];
+  }
+
+  // Setter for rounding state.
+  void SetRounding(int index, bool round) {
+    round_[index] = round;
+  }
+
+  // Round lanes of a vector based on rounding state.
+  LogicVRegister& Round(VectorFormat vform) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      SetInt(vform, i, Int(vform, i) + (GetRounding(i) ? 1 : 0));
+    }
+    return *this;
+  }
+
+  // Unsigned halve lanes of a vector, and use the saturation state to set the
+  // top bit.
+  LogicVRegister& Uhalve(VectorFormat vform) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      uint64_t val = Uint(vform, i);
+      SetRounding(i, (val & 1) == 1);
+      val >>= 1;
+      if (GetUnsignedSaturation(i) != kNotSaturated) {
+        // If the operation causes unsigned saturation, the bit shifted into the
+        // most significant bit must be set.
+        val |= (MaxUintFromFormat(vform) >> 1) + 1;
+      }
+      SetInt(vform, i, val);
+    }
+    return *this;
+  }
+
+  // Signed halve lanes of a vector, and use the carry state to set the top bit.
+  LogicVRegister& Halve(VectorFormat vform) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      int64_t val = Int(vform, i);
+      SetRounding(i, (val & 1) == 1);
+      val >>= 1;
+      if (GetSignedSaturation(i) != kNotSaturated) {
+        // If the operation causes signed saturation, the sign bit must be
+        // inverted.
+        val ^= (MaxUintFromFormat(vform) >> 1) + 1;
+      }
+      SetInt(vform, i, val);
+    }
+    return *this;
+  }
+
+ private:
+  SimVRegister& register_;
+
+  // Allocate one saturation state entry per lane; largest register is type Q,
+  // and lanes can be a minimum of one byte wide.
+  Saturation saturated_[kQRegSizeInBytes];
+
+  // Allocate one rounding state entry per lane.
+  bool round_[kQRegSizeInBytes];
+};
+
+// The proper way to initialize a simulated system register (such as NZCV) is as
+// follows:
+//  SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV);
+class SimSystemRegister {
+ public:
+  // The default constructor represents a register which has no writable bits.
+  // It is not possible to set its value to anything other than 0.
+  SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) { }
+
+  uint32_t RawValue() const {
+    return value_;
+  }
+
+  void SetRawValue(uint32_t new_value) {
+    value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_);
+  }
+
+  uint32_t Bits(int msb, int lsb) const {
+    return ExtractUnsignedBitfield32(msb, lsb, value_);
+  }
+
+  int32_t SignedBits(int msb, int lsb) const {
+    return ExtractSignedBitfield32(msb, lsb, value_);
+  }
+
+  void SetBits(int msb, int lsb, uint32_t bits);
+
+  // Default system register values.
+  static SimSystemRegister DefaultValueFor(SystemRegister id);
+
+#define DEFINE_GETTER(Name, HighBit, LowBit, Func)                            \
+  uint32_t Name() const { return Func(HighBit, LowBit); }              \
+  void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
+#define DEFINE_WRITE_IGNORE_MASK(Name, Mask)                                  \
+  static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask);
+
+  SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK)
+
+#undef DEFINE_ZERO_BITS
+#undef DEFINE_GETTER
+
+ protected:
+  // Most system registers only implement a few of the bits in the word. Other
+  // bits are "read-as-zero, write-ignored". The write_ignore_mask argument
+  // describes the bits which are not modifiable.
+  SimSystemRegister(uint32_t value, uint32_t write_ignore_mask)
+      : value_(value), write_ignore_mask_(write_ignore_mask) { }
+
+  uint32_t value_;
+  uint32_t write_ignore_mask_;
+};
+
+
+class SimExclusiveLocalMonitor {
+ public:
+  SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) {
+    Clear();
+  }
+
+  // Clear the exclusive monitor (like clrex).
+  void Clear() {
+    address_ = 0;
+    size_ = 0;
+  }
+
+  // Clear the exclusive monitor most of the time.
+  void MaybeClear() {
+    if ((seed_ % kSkipClearProbability) != 0) {
+      Clear();
+    }
+
+    // Advance seed_ using a simple linear congruential generator.
+    seed_ = (seed_ * 48271) % 2147483647;
+  }
+
+  // Mark the address range for exclusive access (like load-exclusive).
+  void MarkExclusive(uint64_t address, size_t size) {
+    address_ = address;
+    size_ = size;
+  }
+
+  // Return true if the address range is marked (like store-exclusive).
+  // This helper doesn't implicitly clear the monitor.
+  bool IsExclusive(uint64_t address, size_t size) {
+    VIXL_ASSERT(size > 0);
+    // Be pedantic: Require both the address and the size to match.
+    return (size == size_) && (address == address_);
+  }
+
+ private:
+  uint64_t address_;
+  size_t size_;
+
+  const int kSkipClearProbability;
+  uint32_t seed_;
+};
+
+
+// We can't accurate simulate the global monitor since it depends on external
+// influences. Instead, this implementation occasionally causes accesses to
+// fail, according to kPassProbability.
+class SimExclusiveGlobalMonitor {
+ public:
+  SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {}
+
+  bool IsExclusive(uint64_t address, size_t size) {
+    USE(address, size);
+
+    bool pass = (seed_ % kPassProbability) != 0;
+    // Advance seed_ using a simple linear congruential generator.
+    seed_ = (seed_ * 48271) % 2147483647;
+    return pass;
+  }
+
+ private:
+  const int kPassProbability;
+  uint32_t seed_;
+};
+
+class Redirection;
+
+class Simulator : public DecoderVisitor {
+ public:
+#ifdef JS_CACHE_SIMULATOR_ARM64
+  using Decoder = CachingDecoder;
+  mozilla::Atomic<bool> pendingCacheRequests = mozilla::Atomic<bool>{ false };
+#endif
+  explicit Simulator(Decoder* decoder, FILE* stream = stdout);
+  ~Simulator();
+
+  // Moz changes.
+  void init(Decoder* decoder, FILE* stream);
+  static Simulator* Current();
+  static Simulator* Create();
+  static void Destroy(Simulator* sim);
+  uintptr_t stackLimit() const;
+  uintptr_t* addressOfStackLimit();
+  bool overRecursed(uintptr_t newsp = 0) const;
+  bool overRecursedWithExtra(uint32_t extra) const;
+  int64_t call(uint8_t* entry, int argument_count, ...);
+  static void* RedirectNativeFunction(void* nativeFunction, js::jit::ABIFunctionType type);
+  void setGPR32Result(int32_t result);
+  void setGPR64Result(int64_t result);
+  void setFP32Result(float result);
+  void setFP64Result(double result);
+#ifdef JS_CACHE_SIMULATOR_ARM64
+  void FlushICache();
+#endif
+  void VisitCallRedirection(const Instruction* instr);
+  static uintptr_t StackLimit() {
+    return Simulator::Current()->stackLimit();
+  }
+  template<typename T> T Read(uintptr_t address);
+  template <typename T> void Write(uintptr_t address_, T value);
+  JS::ProfilingFrameIterator::RegisterState registerState();
+
+  void ResetState();
+
+  // Run the simulator.
+  virtual void Run();
+  void RunFrom(const Instruction* first);
+
+  // Simulation helpers.
+  const Instruction* pc() const { return pc_; }
+  const Instruction* get_pc() const { return pc_; }
+  int64_t get_sp() const { return xreg(31, Reg31IsStackPointer); }
+  int64_t get_lr() const { return xreg(30); }
+  int64_t get_fp() const { return xreg(29); }
+
+  template <typename T>
+  T get_pc_as() const { return reinterpret_cast<T>(const_cast<Instruction*>(pc())); }
+
+  void set_pc(const Instruction* new_pc) {
+    pc_ = Memory::AddressUntag(new_pc);
+    pc_modified_ = true;
+  }
+
+  // Handle any wasm faults, returning true if the fault was handled.
+  // This method is rather hot so inline the normal (no-wasm) case.
+  bool MOZ_ALWAYS_INLINE handle_wasm_seg_fault(uintptr_t addr, unsigned numBytes) {
+    if (MOZ_LIKELY(!js::wasm::CodeExists)) {
+      return false;
+    }
+
+    uint8_t* newPC;
+    if (!js::wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes, &newPC)) {
+      return false;
+    }
+
+    set_pc((Instruction*)newPC);
+    return true;
+  }
+
+  void increment_pc() {
+    if (!pc_modified_) {
+      pc_ = pc_->NextInstruction();
+    }
+
+    pc_modified_ = false;
+  }
+
+  void ExecuteInstruction();
+
+  // Declare all Visitor functions.
+  #define DECLARE(A) virtual void Visit##A(const Instruction* instr) override;
+  VISITOR_LIST_THAT_RETURN(DECLARE)
+  VISITOR_LIST_THAT_DONT_RETURN(DECLARE)
+  #undef DECLARE
+
+
+  // Integer register accessors.
+
+  // Basic accessor: Read the register as the specified type.
+  template<typename T>
+  T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
+    VIXL_ASSERT(code < kNumberOfRegisters);
+    if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
+      T result;
+      memset(&result, 0, sizeof(result));
+      return result;
+    }
+    return registers_[code].Get<T>();
+  }
+
+  // Common specialized accessors for the reg() template.
+  int32_t wreg(unsigned code,
+               Reg31Mode r31mode = Reg31IsZeroRegister) const {
+    return reg<int32_t>(code, r31mode);
+  }
+
+  int64_t xreg(unsigned code,
+               Reg31Mode r31mode = Reg31IsZeroRegister) const {
+    return reg<int64_t>(code, r31mode);
+  }
+
+  // As above, with parameterized size and return type. The value is
+  // either zero-extended or truncated to fit, as required.
+  template<typename T>
+  T reg(unsigned size, unsigned code,
+        Reg31Mode r31mode = Reg31IsZeroRegister) const {
+    uint64_t raw;
+    switch (size) {
+      case kWRegSize: raw = reg<uint32_t>(code, r31mode); break;
+      case kXRegSize: raw = reg<uint64_t>(code, r31mode); break;
+      default:
+        VIXL_UNREACHABLE();
+        return 0;
+    }
+
+    T result;
+    VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
+    // Copy the result and truncate to fit. This assumes a little-endian host.
+    memcpy(&result, &raw, sizeof(result));
+    return result;
+  }
+
+  // Use int64_t by default if T is not specified.
+  int64_t reg(unsigned size, unsigned code,
+              Reg31Mode r31mode = Reg31IsZeroRegister) const {
+    return reg<int64_t>(size, code, r31mode);
+  }
+
+  enum RegLogMode {
+    LogRegWrites,
+    NoRegLog
+  };
+
+  // Write 'value' into an integer register. The value is zero-extended. This
+  // behaviour matches AArch64 register writes.
+  template<typename T>
+  void set_reg(unsigned code, T value,
+               RegLogMode log_mode = LogRegWrites,
+               Reg31Mode r31mode = Reg31IsZeroRegister) {
+    if (sizeof(T) < kWRegSizeInBytes) {
+      // We use a C-style cast on purpose here.
+      // Since we do not have access to 'constepxr if', the casts in this `if`
+      // must be valid even if we know the code will never be executed, in
+      // particular when `T` is a pointer type.
+      int64_t tmp_64bit = (int64_t)value;
+      int32_t tmp_32bit = static_cast<int32_t>(tmp_64bit);
+      set_reg<int32_t>(code, tmp_32bit, log_mode, r31mode);
+      return;
+    }
+
+    VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
+                (sizeof(T) == kXRegSizeInBytes));
+    VIXL_ASSERT(code < kNumberOfRegisters);
+
+    if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
+      return;
+    }
+
+    registers_[code].Set(value);
+
+    if (log_mode == LogRegWrites) LogRegister(code, r31mode);
+  }
+
+  // Common specialized accessors for the set_reg() template.
+  void set_wreg(unsigned code, int32_t value,
+                RegLogMode log_mode = LogRegWrites,
+                Reg31Mode r31mode = Reg31IsZeroRegister) {
+    set_reg(code, value, log_mode, r31mode);
+  }
+
+  void set_xreg(unsigned code, int64_t value,
+                RegLogMode log_mode = LogRegWrites,
+                Reg31Mode r31mode = Reg31IsZeroRegister) {
+    set_reg(code, value, log_mode, r31mode);
+  }
+
+  // As above, with parameterized size and type. The value is either
+  // zero-extended or truncated to fit, as required.
+  template<typename T>
+  void set_reg(unsigned size, unsigned code, T value,
+               RegLogMode log_mode = LogRegWrites,
+               Reg31Mode r31mode = Reg31IsZeroRegister) {
+    // Zero-extend the input.
+    uint64_t raw = 0;
+    VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw));
+    memcpy(&raw, &value, sizeof(value));
+
+    // Write (and possibly truncate) the value.
+    switch (size) {
+      case kWRegSize:
+        set_reg(code, static_cast<uint32_t>(raw), log_mode, r31mode);
+        break;
+      case kXRegSize:
+        set_reg(code, raw, log_mode, r31mode);
+        break;
+      default:
+        VIXL_UNREACHABLE();
+        return;
+    }
+  }
+
+  // Common specialized accessors for the set_reg() template.
+
+  // Commonly-used special cases.
+  template<typename T>
+  void set_lr(T value) {
+    set_reg(kLinkRegCode, value);
+  }
+
+  template<typename T>
+  void set_sp(T value) {
+    set_reg(31, value, LogRegWrites, Reg31IsStackPointer);
+  }
+
+  // Vector register accessors.
+  // These are equivalent to the integer register accessors, but for vector
+  // registers.
+
+  // A structure for representing a 128-bit Q register.
+  struct qreg_t { uint8_t val[kQRegSizeInBytes]; };
+
+  // Basic accessor: read the register as the specified type.
+  template<typename T>
+  T vreg(unsigned code) const {
+    VIXL_STATIC_ASSERT((sizeof(T) == kBRegSizeInBytes) ||
+                       (sizeof(T) == kHRegSizeInBytes) ||
+                       (sizeof(T) == kSRegSizeInBytes) ||
+                       (sizeof(T) == kDRegSizeInBytes) ||
+                       (sizeof(T) == kQRegSizeInBytes));
+    VIXL_ASSERT(code < kNumberOfVRegisters);
+
+    return vregisters_[code].Get<T>();
+  }
+
+  // Common specialized accessors for the vreg() template.
+  int8_t breg(unsigned code) const {
+    return vreg<int8_t>(code);
+  }
+
+  int16_t hreg(unsigned code) const {
+    return vreg<int16_t>(code);
+  }
+
+  float sreg(unsigned code) const {
+    return vreg<float>(code);
+  }
+
+  uint32_t sreg_bits(unsigned code) const {
+    return vreg<uint32_t>(code);
+  }
+
+  double dreg(unsigned code) const {
+    return vreg<double>(code);
+  }
+
+  uint64_t dreg_bits(unsigned code) const {
+    return vreg<uint64_t>(code);
+  }
+
+  qreg_t qreg(unsigned code)  const {
+    return vreg<qreg_t>(code);
+  }
+
+  // As above, with parameterized size and return type. The value is
+  // either zero-extended or truncated to fit, as required.
+  template<typename T>
+  T vreg(unsigned size, unsigned code) const {
+    uint64_t raw = 0;
+    T result;
+
+    switch (size) {
+      case kSRegSize: raw = vreg<uint32_t>(code); break;
+      case kDRegSize: raw = vreg<uint64_t>(code); break;
+      default:
+        VIXL_UNREACHABLE();
+        break;
+    }
+
+    VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
+    // Copy the result and truncate to fit. This assumes a little-endian host.
+    memcpy(&result, &raw, sizeof(result));
+    return result;
+  }
+
+  inline SimVRegister& vreg(unsigned code) {
+    return vregisters_[code];
+  }
+
+  // Basic accessor: Write the specified value.
+  template<typename T>
+  void set_vreg(unsigned code, T value,
+                RegLogMode log_mode = LogRegWrites) {
+    VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) ||
+                       (sizeof(value) == kHRegSizeInBytes) ||
+                       (sizeof(value) == kSRegSizeInBytes) ||
+                       (sizeof(value) == kDRegSizeInBytes) ||
+                       (sizeof(value) == kQRegSizeInBytes));
+    VIXL_ASSERT(code < kNumberOfVRegisters);
+    vregisters_[code].Set(value);
+
+    if (log_mode == LogRegWrites) {
+      LogVRegister(code, GetPrintRegisterFormat(value));
+    }
+  }
+
+  // Common specialized accessors for the set_vreg() template.
+  void set_breg(unsigned code, int8_t value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_vreg(code, value, log_mode);
+  }
+
+  void set_hreg(unsigned code, int16_t value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_vreg(code, value, log_mode);
+  }
+
+  void set_sreg(unsigned code, float value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_vreg(code, value, log_mode);
+  }
+
+  void set_sreg_bits(unsigned code, uint32_t value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_vreg(code, value, log_mode);
+  }
+
+  void set_dreg(unsigned code, double value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_vreg(code, value, log_mode);
+  }
+
+  void set_dreg_bits(unsigned code, uint64_t value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_vreg(code, value, log_mode);
+  }
+
+  void set_qreg(unsigned code, qreg_t value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_vreg(code, value, log_mode);
+  }
+
+  bool N() const { return nzcv_.N() != 0; }
+  bool Z() const { return nzcv_.Z() != 0; }
+  bool C() const { return nzcv_.C() != 0; }
+  bool V() const { return nzcv_.V() != 0; }
+
+  SimSystemRegister& ReadNzcv() { return nzcv_; }
+  SimSystemRegister& nzcv() { return nzcv_; }
+
+  // TODO: Find a way to make the fpcr_ members return the proper types, so
+  // these accessors are not necessary.
+  FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); }
+  bool DN() { return fpcr_.DN() != 0; }
+  SimSystemRegister& fpcr() { return fpcr_; }
+
+  UseDefaultNaN ReadDN() const {
+    return fpcr_.DN() != 0 ? kUseDefaultNaN : kIgnoreDefaultNaN;
+  }
+
+  // Specify relevant register formats for Print(V)Register and related helpers.
+  enum PrintRegisterFormat {
+    // The lane size.
+    kPrintRegLaneSizeB = 0 << 0,
+    kPrintRegLaneSizeH = 1 << 0,
+    kPrintRegLaneSizeS = 2 << 0,
+    kPrintRegLaneSizeW = kPrintRegLaneSizeS,
+    kPrintRegLaneSizeD = 3 << 0,
+    kPrintRegLaneSizeX = kPrintRegLaneSizeD,
+    kPrintRegLaneSizeQ = 4 << 0,
+
+    kPrintRegLaneSizeOffset = 0,
+    kPrintRegLaneSizeMask = 7 << 0,
+
+    // The lane count.
+    kPrintRegAsScalar = 0,
+    kPrintRegAsDVector = 1 << 3,
+    kPrintRegAsQVector = 2 << 3,
+
+    kPrintRegAsVectorMask = 3 << 3,
+
+    // Indicate floating-point format lanes. (This flag is only supported for S-
+    // and D-sized lanes.)
+    kPrintRegAsFP = 1 << 5,
+
+    // Supported combinations.
+
+    kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar,
+    kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar,
+    kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
+    kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
+
+    kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar,
+    kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector,
+    kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector,
+    kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar,
+    kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector,
+    kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector,
+    kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar,
+    kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector,
+    kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector,
+    kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
+    kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP,
+    kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP,
+    kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar,
+    kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector,
+    kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
+    kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP,
+    kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar
+  };
+
+  unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
+    return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
+  }
+
+  unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {
+    return 1 << GetPrintRegLaneSizeInBytesLog2(format);
+  }
+
+  unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
+    if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2;
+    if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2;
+
+    // Scalar types.
+    return GetPrintRegLaneSizeInBytesLog2(format);
+  }
+
+  unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
+    return 1 << GetPrintRegSizeInBytesLog2(format);
+  }
+
+  unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
+    unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
+    unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
+    VIXL_ASSERT(reg_size_log2 >= lane_size_log2);
+    return 1 << (reg_size_log2 - lane_size_log2);
+  }
+
+  PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
+                                                    unsigned lane_size);
+
+  PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) {
+    return GetPrintRegisterFormatForSize(size, size);
+  }
+
+  PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
+    switch (size) {
+      default: VIXL_UNREACHABLE(); return kPrintDReg;
+      case kDRegSizeInBytes: return kPrintDReg;
+      case kSRegSizeInBytes: return kPrintSReg;
+    }
+  }
+
+  PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {
+    if ((GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) ||
+        (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) {
+      return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);
+    }
+    return format;
+  }
+
+  template<typename T>
+  PrintRegisterFormat GetPrintRegisterFormat(T value) {
+    return GetPrintRegisterFormatForSize(sizeof(value));
+  }
+
+  PrintRegisterFormat GetPrintRegisterFormat(double value) {
+    VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes);
+    return GetPrintRegisterFormatForSizeFP(sizeof(value));
+  }
+
+  PrintRegisterFormat GetPrintRegisterFormat(float value) {
+    VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes);
+    return GetPrintRegisterFormatForSizeFP(sizeof(value));
+  }
+
+  PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
+
+  // Print all registers of the specified types.
+  void PrintRegisters();
+  void PrintVRegisters();
+  void PrintSystemRegisters();
+
+  // As above, but only print the registers that have been updated.
+  void PrintWrittenRegisters();
+  void PrintWrittenVRegisters();
+
+  // As above, but respect LOG_REG and LOG_VREG.
+  inline void LogWrittenRegisters() {
+    if (trace_parameters() & LOG_REGS) PrintWrittenRegisters();
+  }
+  inline void LogWrittenVRegisters() {
+    if (trace_parameters() & LOG_VREGS) PrintWrittenVRegisters();
+  }
+  inline void LogAllWrittenRegisters() {
+    LogWrittenRegisters();
+    LogWrittenVRegisters();
+  }
+
+  // Print individual register values (after update).
+  void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);
+  void PrintVRegister(unsigned code, PrintRegisterFormat format);
+  void PrintSystemRegister(SystemRegister id);
+
+  // Like Print* (above), but respect trace_parameters().
+  void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {
+    if (trace_parameters() & LOG_REGS) PrintRegister(code, r31mode);
+  }
+  void LogVRegister(unsigned code, PrintRegisterFormat format) {
+    if (trace_parameters() & LOG_VREGS) PrintVRegister(code, format);
+  }
+  void LogSystemRegister(SystemRegister id) {
+    if (trace_parameters() & LOG_SYSREGS) PrintSystemRegister(id);
+  }
+
+  // Print memory accesses.
+  void PrintRead(uintptr_t address, unsigned reg_code,
+                 PrintRegisterFormat format);
+  void PrintWrite(uintptr_t address, unsigned reg_code,
+                 PrintRegisterFormat format);
+  void PrintVRead(uintptr_t address, unsigned reg_code,
+                  PrintRegisterFormat format, unsigned lane);
+  void PrintVWrite(uintptr_t address, unsigned reg_code,
+                   PrintRegisterFormat format, unsigned lane);
+
+  // Like Print* (above), but respect trace_parameters().
+  void LogRead(uintptr_t address, unsigned reg_code,
+               PrintRegisterFormat format) {
+    if (trace_parameters() & LOG_REGS) PrintRead(address, reg_code, format);
+  }
+  void LogWrite(uintptr_t address, unsigned reg_code,
+                PrintRegisterFormat format) {
+    if (trace_parameters() & LOG_WRITE) PrintWrite(address, reg_code, format);
+  }
+  void LogVRead(uintptr_t address, unsigned reg_code,
+                PrintRegisterFormat format, unsigned lane = 0) {
+    if (trace_parameters() & LOG_VREGS) {
+      PrintVRead(address, reg_code, format, lane);
+    }
+  }
+  void LogVWrite(uintptr_t address, unsigned reg_code,
+                 PrintRegisterFormat format, unsigned lane = 0) {
+    if (trace_parameters() & LOG_WRITE) {
+      PrintVWrite(address, reg_code, format, lane);
+    }
+  }
+
+  // Helper functions for register tracing.
+  void PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
+                              int size_in_bytes = kXRegSizeInBytes);
+  void PrintVRegisterRawHelper(unsigned code, int bytes = kQRegSizeInBytes,
+                               int lsb = 0);
+  void PrintVRegisterFPHelper(unsigned code, unsigned lane_size_in_bytes,
+                              int lane_count = 1, int rightmost_lane = 0);
+
+  void DoUnreachable(const Instruction* instr);
+  void DoTrace(const Instruction* instr);
+  void DoLog(const Instruction* instr);
+
+  static const char* WRegNameForCode(unsigned code,
+                                     Reg31Mode mode = Reg31IsZeroRegister);
+  static const char* XRegNameForCode(unsigned code,
+                                     Reg31Mode mode = Reg31IsZeroRegister);
+  static const char* SRegNameForCode(unsigned code);
+  static const char* DRegNameForCode(unsigned code);
+  static const char* VRegNameForCode(unsigned code);
+
+  bool coloured_trace() const { return coloured_trace_; }
+  void set_coloured_trace(bool value);
+
+  int trace_parameters() const { return trace_parameters_; }
+  void set_trace_parameters(int parameters);
+
+  void set_instruction_stats(bool value);
+
+  // Clear the simulated local monitor to force the next store-exclusive
+  // instruction to fail.
+  void ClearLocalMonitor() {
+    local_monitor_.Clear();
+  }
+
+  void SilenceExclusiveAccessWarning() {
+    print_exclusive_access_warning_ = false;
+  }
+
+ protected:
+  const char* clr_normal;
+  const char* clr_flag_name;
+  const char* clr_flag_value;
+  const char* clr_reg_name;
+  const char* clr_reg_value;
+  const char* clr_vreg_name;
+  const char* clr_vreg_value;
+  const char* clr_memory_address;
+  const char* clr_warning;
+  const char* clr_warning_message;
+  const char* clr_printf;
+
+  // Simulation helpers ------------------------------------
+  bool ConditionPassed(Condition cond) {
+    switch (cond) {
+      case eq:
+        return Z();
+      case ne:
+        return !Z();
+      case hs:
+        return C();
+      case lo:
+        return !C();
+      case mi:
+        return N();
+      case pl:
+        return !N();
+      case vs:
+        return V();
+      case vc:
+        return !V();
+      case hi:
+        return C() && !Z();
+      case ls:
+        return !(C() && !Z());
+      case ge:
+        return N() == V();
+      case lt:
+        return N() != V();
+      case gt:
+        return !Z() && (N() == V());
+      case le:
+        return !(!Z() && (N() == V()));
+      case nv:
+        VIXL_FALLTHROUGH();
+      case al:
+        return true;
+      default:
+        VIXL_UNREACHABLE();
+        return false;
+    }
+  }
+
+  bool ConditionPassed(Instr cond) {
+    return ConditionPassed(static_cast<Condition>(cond));
+  }
+
+  bool ConditionFailed(Condition cond) {
+    return !ConditionPassed(cond);
+  }
+
+  void AddSubHelper(const Instruction* instr, int64_t op2);
+  uint64_t AddWithCarry(unsigned reg_size,
+                        bool set_flags,
+                        uint64_t left,
+                        uint64_t right,
+                        int carry_in = 0);
+  void LogicalHelper(const Instruction* instr, int64_t op2);
+  void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
+  void LoadStoreHelper(const Instruction* instr,
+                       int64_t offset,
+                       AddrMode addrmode);
+  void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
+  template <typename T>
+  void CompareAndSwapHelper(const Instruction* instr);
+  template <typename T>
+  void CompareAndSwapPairHelper(const Instruction* instr);
+  template <typename T>
+  void AtomicMemorySimpleHelper(const Instruction* instr);
+  template <typename T>
+  void AtomicMemorySwapHelper(const Instruction* instr);
+  template <typename T>
+  void LoadAcquireRCpcHelper(const Instruction* instr);
+  uintptr_t AddressModeHelper(unsigned addr_reg,
+                              int64_t offset,
+                              AddrMode addrmode);
+  void NEONLoadStoreMultiStructHelper(const Instruction* instr,
+                                      AddrMode addr_mode);
+  void NEONLoadStoreSingleStructHelper(const Instruction* instr,
+                                       AddrMode addr_mode);
+
+  uint64_t AddressUntag(uint64_t address) {
+    return address & ~kAddressTagMask;
+  }
+
+  template <typename T>
+  T* AddressUntag(T* address) {
+    uintptr_t address_raw = reinterpret_cast<uintptr_t>(address);
+    return reinterpret_cast<T*>(AddressUntag(address_raw));
+  }
+
+  int64_t ShiftOperand(unsigned reg_size,
+                       int64_t value,
+                       Shift shift_type,
+                       unsigned amount);
+  int64_t Rotate(unsigned reg_width,
+                 int64_t value,
+                 Shift shift_type,
+                 unsigned amount);
+  int64_t ExtendValue(unsigned reg_width,
+                      int64_t value,
+                      Extend extend_type,
+                      unsigned left_shift = 0);
+  uint16_t PolynomialMult(uint8_t op1, uint8_t op2);
+
+  void ld1(VectorFormat vform,
+           LogicVRegister dst,
+           uint64_t addr);
+  void ld1(VectorFormat vform,
+           LogicVRegister dst,
+           int index,
+           uint64_t addr);
+  void ld1r(VectorFormat vform,
+            LogicVRegister dst,
+            uint64_t addr);
+  void ld2(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           uint64_t addr);
+  void ld2(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           int index,
+           uint64_t addr);
+  void ld2r(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           uint64_t addr);
+  void ld3(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           LogicVRegister dst3,
+           uint64_t addr);
+  void ld3(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           LogicVRegister dst3,
+           int index,
+           uint64_t addr);
+  void ld3r(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           LogicVRegister dst3,
+           uint64_t addr);
+  void ld4(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           LogicVRegister dst3,
+           LogicVRegister dst4,
+           uint64_t addr);
+  void ld4(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           LogicVRegister dst3,
+           LogicVRegister dst4,
+           int index,
+           uint64_t addr);
+  void ld4r(VectorFormat vform,
+           LogicVRegister dst1,
+           LogicVRegister dst2,
+           LogicVRegister dst3,
+           LogicVRegister dst4,
+           uint64_t addr);
+  void st1(VectorFormat vform,
+           LogicVRegister src,
+           uint64_t addr);
+  void st1(VectorFormat vform,
+           LogicVRegister src,
+           int index,
+           uint64_t addr);
+  void st2(VectorFormat vform,
+           LogicVRegister src,
+           LogicVRegister src2,
+           uint64_t addr);
+  void st2(VectorFormat vform,
+           LogicVRegister src,
+           LogicVRegister src2,
+           int index,
+           uint64_t addr);
+  void st3(VectorFormat vform,
+           LogicVRegister src,
+           LogicVRegister src2,
+           LogicVRegister src3,
+           uint64_t addr);
+  void st3(VectorFormat vform,
+           LogicVRegister src,
+           LogicVRegister src2,
+           LogicVRegister src3,
+           int index,
+           uint64_t addr);
+  void st4(VectorFormat vform,
+           LogicVRegister src,
+           LogicVRegister src2,
+           LogicVRegister src3,
+           LogicVRegister src4,
+           uint64_t addr);
+  void st4(VectorFormat vform,
+           LogicVRegister src,
+           LogicVRegister src2,
+           LogicVRegister src3,
+           LogicVRegister src4,
+           int index,
+           uint64_t addr);
+  LogicVRegister cmp(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2,
+                     Condition cond);
+  LogicVRegister cmp(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     int imm,
+                     Condition cond);
+  LogicVRegister cmptst(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister add(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister addp(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister mla(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister mls(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister mul(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister mul(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2,
+                     int index);
+  LogicVRegister mla(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2,
+                     int index);
+  LogicVRegister mls(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2,
+                     int index);
+  LogicVRegister pmul(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+
+  typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
+                                                   LogicVRegister dst,
+                                                   const LogicVRegister& src1,
+                                                   const LogicVRegister& src2,
+                                                   int index);
+  LogicVRegister fmul(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2,
+                      int index);
+  LogicVRegister fmla(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2,
+                      int index);
+  LogicVRegister fmls(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2,
+                      int index);
+  LogicVRegister fmulx(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       int index);
+  LogicVRegister smull(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       int index);
+  LogicVRegister smull2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        int index);
+  LogicVRegister umull(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       int index);
+  LogicVRegister umull2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        int index);
+  LogicVRegister smlal(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       int index);
+  LogicVRegister smlal2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        int index);
+  LogicVRegister umlal(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       int index);
+  LogicVRegister umlal2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        int index);
+  LogicVRegister smlsl(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       int index);
+  LogicVRegister smlsl2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        int index);
+  LogicVRegister umlsl(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       int index);
+  LogicVRegister umlsl2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        int index);
+  LogicVRegister sqdmull(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2,
+                         int index);
+  LogicVRegister sqdmull2(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src1,
+                          const LogicVRegister& src2,
+                          int index);
+  LogicVRegister sqdmlal(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2,
+                         int index);
+  LogicVRegister sqdmlal2(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src1,
+                          const LogicVRegister& src2,
+                          int index);
+  LogicVRegister sqdmlsl(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2,
+                         int index);
+  LogicVRegister sqdmlsl2(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src1,
+                          const LogicVRegister& src2,
+                          int index);
+  LogicVRegister sqdmulh(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2,
+                         int index);
+  LogicVRegister sqrdmulh(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src1,
+                          const LogicVRegister& src2,
+                          int index);
+  LogicVRegister sub(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister and_(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister orr(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister orn(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister eor(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister bic(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister bic(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     uint64_t imm);
+  LogicVRegister bif(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister bit(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister bsl(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister cls(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src);
+  LogicVRegister clz(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src);
+  LogicVRegister cnt(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src);
+  LogicVRegister not_(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  LogicVRegister rbit(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  LogicVRegister rev(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     int revSize);
+  LogicVRegister rev16(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister rev32(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister rev64(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister addlp(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       bool is_signed,
+                       bool do_accumulate);
+  LogicVRegister saddlp(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister uaddlp(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister sadalp(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister uadalp(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister ext(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2,
+                     int index);
+  LogicVRegister ins_element(VectorFormat vform,
+                             LogicVRegister dst,
+                             int dst_index,
+                             const LogicVRegister& src,
+                             int src_index);
+  LogicVRegister ins_immediate(VectorFormat vform,
+                               LogicVRegister dst,
+                               int dst_index,
+                               uint64_t imm);
+  LogicVRegister dup_element(VectorFormat vform,
+                             LogicVRegister dst,
+                             const LogicVRegister& src,
+                             int src_index);
+  LogicVRegister dup_immediate(VectorFormat vform,
+                               LogicVRegister dst,
+                               uint64_t imm);
+  LogicVRegister mov(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src);                               
+  LogicVRegister movi(VectorFormat vform,
+                      LogicVRegister dst,
+                      uint64_t imm);
+  LogicVRegister mvni(VectorFormat vform,
+                      LogicVRegister dst,
+                      uint64_t imm);
+  LogicVRegister orr(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     uint64_t imm);
+  LogicVRegister sshl(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister ushl(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister sminmax(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2,
+                         bool max);
+  LogicVRegister smax(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister smin(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister sminmaxp(VectorFormat vform,
+                          LogicVRegister dst,
+                          int dst_index,
+                          const LogicVRegister& src,
+                          bool max);
+  LogicVRegister smaxp(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister sminp(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister addp(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  LogicVRegister addv(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  LogicVRegister uaddlv(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister saddlv(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister sminmaxv(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src,
+                          bool max);
+  LogicVRegister smaxv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister sminv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister uxtl(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  LogicVRegister uxtl2(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister sxtl(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  LogicVRegister sxtl2(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister tbl(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& tab,
+                     const LogicVRegister& ind);
+  LogicVRegister tbl(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& tab,
+                     const LogicVRegister& tab2,
+                     const LogicVRegister& ind);
+  LogicVRegister tbl(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& tab,
+                     const LogicVRegister& tab2,
+                     const LogicVRegister& tab3,
+                     const LogicVRegister& ind);
+  LogicVRegister tbl(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& tab,
+                     const LogicVRegister& tab2,
+                     const LogicVRegister& tab3,
+                     const LogicVRegister& tab4,
+                     const LogicVRegister& ind);
+  LogicVRegister tbx(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& tab,
+                     const LogicVRegister& ind);
+  LogicVRegister tbx(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& tab,
+                     const LogicVRegister& tab2,
+                     const LogicVRegister& ind);
+  LogicVRegister tbx(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& tab,
+                     const LogicVRegister& tab2,
+                     const LogicVRegister& tab3,
+                     const LogicVRegister& ind);
+  LogicVRegister tbx(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& tab,
+                     const LogicVRegister& tab2,
+                     const LogicVRegister& tab3,
+                     const LogicVRegister& tab4,
+                     const LogicVRegister& ind);
+  LogicVRegister uaddl(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister uaddl2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister uaddw(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister uaddw2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister saddl(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister saddl2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister saddw(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister saddw2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister usubl(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2);
+  LogicVRegister usubl2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister usubw(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister usubw2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister ssubl(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister ssubl2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister ssubw(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister ssubw2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister uminmax(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2,
+                         bool max);
+  LogicVRegister umax(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister umin(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicVRegister uminmaxp(VectorFormat vform,
+                          LogicVRegister dst,
+                          int dst_index,
+                          const LogicVRegister& src,
+                          bool max);
+  LogicVRegister umaxp(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister uminp(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+  LogicVRegister uminmaxv(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src,
+                          bool max);
+  LogicVRegister umaxv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister uminv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister trn1(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister trn2(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister zip1(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister zip2(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister uzp1(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister uzp2(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister shl(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     int shift);
+  LogicVRegister scvtf(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int fbits,
+                       FPRounding rounding_mode);
+  LogicVRegister ucvtf(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int fbits,
+                       FPRounding rounding_mode);
+  LogicVRegister sshll(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int shift);
+  LogicVRegister sshll2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src,
+                        int shift);
+  LogicVRegister shll(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  LogicVRegister shll2(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister ushll(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int shift);
+  LogicVRegister ushll2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src,
+                        int shift);
+  LogicVRegister sli(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     int shift);
+  LogicVRegister sri(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     int shift);
+  LogicVRegister sshr(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src,
+                      int shift);
+  LogicVRegister ushr(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src,
+                      int shift);
+  LogicVRegister ssra(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src,
+                      int shift);
+  LogicVRegister usra(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src,
+                      int shift);
+  LogicVRegister srsra(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int shift);
+  LogicVRegister ursra(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int shift);
+  LogicVRegister suqadd(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister usqadd(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister sqshl(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int shift);
+  LogicVRegister uqshl(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int shift);
+  LogicVRegister sqshlu(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src,
+                        int shift);
+  LogicVRegister abs(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src);
+  LogicVRegister neg(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src);
+  LogicVRegister extractnarrow(VectorFormat vform,
+                               LogicVRegister dst,
+                               bool dstIsSigned,
+                               const LogicVRegister& src,
+                               bool srcIsSigned);
+  LogicVRegister xtn(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src);
+  LogicVRegister sqxtn(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister uqxtn(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister sqxtun(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister absdiff(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2,
+                         bool issigned);
+  LogicVRegister saba(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister uaba(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister shrn(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src,
+                      int shift);
+  LogicVRegister shrn2(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src,
+                      int shift);
+  LogicVRegister rshrn(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       int shift);
+  LogicVRegister rshrn2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src,
+                        int shift);
+  LogicVRegister uqshrn(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src,
+                        int shift);
+  LogicVRegister uqshrn2(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src,
+                         int shift);
+  LogicVRegister uqrshrn(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src,
+                         int shift);
+  LogicVRegister uqrshrn2(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src,
+                          int shift);
+  LogicVRegister sqshrn(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src,
+                        int shift);
+  LogicVRegister sqshrn2(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src,
+                         int shift);
+  LogicVRegister sqrshrn(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src,
+                         int shift);
+  LogicVRegister sqrshrn2(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src,
+                          int shift);
+  LogicVRegister sqshrun(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src,
+                         int shift);
+  LogicVRegister sqshrun2(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src,
+                          int shift);
+  LogicVRegister sqrshrun(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src,
+                          int shift);
+  LogicVRegister sqrshrun2(VectorFormat vform,
+                           LogicVRegister dst,
+                           const LogicVRegister& src,
+                           int shift);
+  LogicVRegister sqrdmulh(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src1,
+                          const LogicVRegister& src2,
+                          bool round = true);
+  LogicVRegister sqdmulh(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2);
+  #define NEON_3VREG_LOGIC_LIST(V) \
+    V(addhn)                       \
+    V(addhn2)                      \
+    V(raddhn)                      \
+    V(raddhn2)                     \
+    V(subhn)                       \
+    V(subhn2)                      \
+    V(rsubhn)                      \
+    V(rsubhn2)                     \
+    V(pmull)                       \
+    V(pmull2)                      \
+    V(sabal)                       \
+    V(sabal2)                      \
+    V(uabal)                       \
+    V(uabal2)                      \
+    V(sabdl)                       \
+    V(sabdl2)                      \
+    V(uabdl)                       \
+    V(uabdl2)                      \
+    V(smull)                       \
+    V(smull2)                      \
+    V(umull)                       \
+    V(umull2)                      \
+    V(smlal)                       \
+    V(smlal2)                      \
+    V(umlal)                       \
+    V(umlal2)                      \
+    V(smlsl)                       \
+    V(smlsl2)                      \
+    V(umlsl)                       \
+    V(umlsl2)                      \
+    V(sqdmlal)                     \
+    V(sqdmlal2)                    \
+    V(sqdmlsl)                     \
+    V(sqdmlsl2)                    \
+    V(sqdmull)                     \
+    V(sqdmull2)
+
+  #define DEFINE_LOGIC_FUNC(FXN)                   \
+    LogicVRegister FXN(VectorFormat vform,         \
+                       LogicVRegister dst,         \
+                       const LogicVRegister& src1, \
+                       const LogicVRegister& src2);
+  NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
+  #undef DEFINE_LOGIC_FUNC
+
+  #define NEON_FP3SAME_LIST(V)  \
+    V(fadd,   FPAdd,   false)   \
+    V(fsub,   FPSub,   true)    \
+    V(fmul,   FPMul,   true)    \
+    V(fmulx,  FPMulx,  true)    \
+    V(fdiv,   FPDiv,   true)    \
+    V(fmax,   FPMax,   false)   \
+    V(fmin,   FPMin,   false)   \
+    V(fmaxnm, FPMaxNM, false)   \
+    V(fminnm, FPMinNM, false)
+
+  #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
+    template <typename T>                            \
+    LogicVRegister FN(VectorFormat vform,            \
+                      LogicVRegister dst,            \
+                      const LogicVRegister& src1,    \
+                      const LogicVRegister& src2);   \
+    LogicVRegister FN(VectorFormat vform,            \
+                      LogicVRegister dst,            \
+                      const LogicVRegister& src1,    \
+                      const LogicVRegister& src2);
+  NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)
+  #undef DECLARE_NEON_FP_VECTOR_OP
+
+  #define NEON_FPPAIRWISE_LIST(V)         \
+    V(faddp,   fadd,   FPAdd)             \
+    V(fmaxp,   fmax,   FPMax)             \
+    V(fmaxnmp, fmaxnm, FPMaxNM)           \
+    V(fminp,   fmin,   FPMin)             \
+    V(fminnmp, fminnm, FPMinNM)
+
+  #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP)       \
+    LogicVRegister FNP(VectorFormat vform,           \
+                       LogicVRegister dst,           \
+                       const LogicVRegister& src1,   \
+                       const LogicVRegister& src2);  \
+    LogicVRegister FNP(VectorFormat vform,           \
+                       LogicVRegister dst,           \
+                       const LogicVRegister& src);
+  NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)
+  #undef DECLARE_NEON_FP_PAIR_OP
+
+  template <typename T>
+  LogicVRegister frecps(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister frecps(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  template <typename T>
+  LogicVRegister frsqrts(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2);
+  LogicVRegister frsqrts(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2);
+  template <typename T>
+  LogicVRegister fmla(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister fmla(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  template <typename T>
+  LogicVRegister fmls(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister fmls(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister fnmul(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
+
+  template <typename T>
+  LogicVRegister fcmp(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2,
+                      Condition cond);
+  LogicVRegister fcmp(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2,
+                      Condition cond);
+  LogicVRegister fabscmp(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2,
+                         Condition cond);
+  LogicVRegister fcmp_zero(VectorFormat vform,
+                           LogicVRegister dst,
+                           const LogicVRegister& src,
+                           Condition cond);
+
+  template <typename T>
+  LogicVRegister fneg(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  LogicVRegister fneg(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
+  template <typename T>
+  LogicVRegister frecpx(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister frecpx(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  template <typename T>
+  LogicVRegister fabs_(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister fabs_(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister fabd(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister frint(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       FPRounding rounding_mode,
+                       bool inexact_exception = false);
+  LogicVRegister fcvts(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       FPRounding rounding_mode,
+                       int fbits = 0);
+  LogicVRegister fcvtu(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src,
+                       FPRounding rounding_mode,
+                       int fbits = 0);
+  LogicVRegister fcvtl(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister fcvtl2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister fcvtn(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister fcvtn2(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister fcvtxn(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+  LogicVRegister fcvtxn2(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src);
+  LogicVRegister fsqrt(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister frsqrte(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src);
+  LogicVRegister frecpe(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src,
+                        FPRounding rounding);
+  LogicVRegister ursqrte(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src);
+  LogicVRegister urecpe(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src);
+
+  typedef float (Simulator::*FPMinMaxOp)(float a, float b);
+
+  LogicVRegister fminmaxv(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src,
+                          FPMinMaxOp Op);
+
+  LogicVRegister fminv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister fmaxv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  LogicVRegister fminnmv(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src);
+  LogicVRegister fmaxnmv(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicVRegister& src);
+
+  static const uint32_t CRC32_POLY  = 0x04C11DB7;
+  static const uint32_t CRC32C_POLY = 0x1EDC6F41;
+  uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly);
+  template <typename T>
+  uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
+  uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
+
+  void SysOp_W(int op, int64_t val);
+
+  template <typename T>
+  T FPRecipSqrtEstimate(T op);
+  template <typename T>
+  T FPRecipEstimate(T op, FPRounding rounding);
+  template <typename T, typename R>
+  R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
+
+  void FPCompare(double val0, double val1, FPTrapFlags trap);
+  double FPRoundInt(double value, FPRounding round_mode);
+  double recip_sqrt_estimate(double a);
+  double recip_estimate(double a);
+  double FPRecipSqrtEstimate(double a);
+  double FPRecipEstimate(double a);
+  double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);
+  double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
+  float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
+  float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode);
+  int32_t FPToInt32(double value, FPRounding rmode);
+  int64_t FPToInt64(double value, FPRounding rmode);
+  uint32_t FPToUInt32(double value, FPRounding rmode);
+  uint64_t FPToUInt64(double value, FPRounding rmode);
+  int32_t FPToFixedJS(double value);
+
+  template <typename T>
+  T FPAdd(T op1, T op2);
+
+  template <typename T>
+  T FPDiv(T op1, T op2);
+
+  template <typename T>
+  T FPMax(T a, T b);
+
+  template <typename T>
+  T FPMaxNM(T a, T b);
+
+  template <typename T>
+  T FPMin(T a, T b);
+
+  template <typename T>
+  T FPMinNM(T a, T b);
+
+  template <typename T>
+  T FPMul(T op1, T op2);
+
+  template <typename T>
+  T FPMulx(T op1, T op2);
+
+  template <typename T>
+  T FPMulAdd(T a, T op1, T op2);
+
+  template <typename T>
+  T FPSqrt(T op);
+
+  template <typename T>
+  T FPSub(T op1, T op2);
+
+  template <typename T>
+  T FPRecipStepFused(T op1, T op2);
+
+  template <typename T>
+  T FPRSqrtStepFused(T op1, T op2);
+
+  // This doesn't do anything at the moment. We'll need it if we want support
+  // for cumulative exception bits or floating-point exceptions.
+  void FPProcessException() { }
+
+  bool FPProcessNaNs(const Instruction* instr);
+
+  // Pseudo Printf instruction
+  void DoPrintf(const Instruction* instr);
+
+  // Processor state ---------------------------------------
+
+  // Simulated monitors for exclusive access instructions.
+  SimExclusiveLocalMonitor local_monitor_;
+  SimExclusiveGlobalMonitor global_monitor_;
+
+  // Output stream.
+  FILE* stream_;
+  PrintDisassembler* print_disasm_;
+
+  // Instruction statistics instrumentation.
+  Instrument* instrumentation_;
+
+  // General purpose registers. Register 31 is the stack pointer.
+  SimRegister registers_[kNumberOfRegisters];
+
+  // Vector registers
+  SimVRegister vregisters_[kNumberOfVRegisters];
+
+  // Program Status Register.
+  // bits[31, 27]: Condition flags N, Z, C, and V.
+  //               (Negative, Zero, Carry, Overflow)
+  SimSystemRegister nzcv_;
+
+  // Floating-Point Control Register
+  SimSystemRegister fpcr_;
+
+  // Only a subset of FPCR features are supported by the simulator. This helper
+  // checks that the FPCR settings are supported.
+  //
+  // This is checked when floating-point instructions are executed, not when
+  // FPCR is set. This allows generated code to modify FPCR for external
+  // functions, or to save and restore it when entering and leaving generated
+  // code.
+  void AssertSupportedFPCR() {
+    VIXL_ASSERT(fpcr().FZ() == 0);             // No flush-to-zero support.
+    VIXL_ASSERT(fpcr().RMode() == FPTieEven);  // Ties-to-even rounding only.
+
+    // The simulator does not support half-precision operations so fpcr().AHP()
+    // is irrelevant, and is not checked here.
+  }
+
+  static int CalcNFlag(uint64_t result, unsigned reg_size) {
+    return (result >> (reg_size - 1)) & 1;
+  }
+
+  static int CalcZFlag(uint64_t result) {
+    return (result == 0) ? 1 : 0;
+  }
+
+  static const uint32_t kConditionFlagsMask = 0xf0000000;
+
+  // Stack
+  byte* stack_;
+  static const int stack_protection_size_ = 512 * KBytes;
+  static const int stack_size_ = (2 * MBytes) + (2 * stack_protection_size_);
+  byte* stack_limit_;
+
+  Decoder* decoder_;
+  // Indicates if the pc has been modified by the instruction and should not be
+  // automatically incremented.
+  bool pc_modified_;
+  const Instruction* pc_;
+
+  static const char* xreg_names[];
+  static const char* wreg_names[];
+  static const char* sreg_names[];
+  static const char* dreg_names[];
+  static const char* vreg_names[];
+
+  static const Instruction* kEndOfSimAddress;
+
+ private:
+  template <typename T>
+  static T FPDefaultNaN();
+
+  // Standard NaN processing.
+  template <typename T>
+  T FPProcessNaN(T op) {
+    VIXL_ASSERT(std::isnan(op));
+    if (IsSignallingNaN(op)) {
+      FPProcessException();
+    }
+    return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
+  }
+
+  template <typename T>
+  T FPProcessNaNs(T op1, T op2) {
+    if (IsSignallingNaN(op1)) {
+      return FPProcessNaN(op1);
+    } else if (IsSignallingNaN(op2)) {
+      return FPProcessNaN(op2);
+    } else if (std::isnan(op1)) {
+      VIXL_ASSERT(IsQuietNaN(op1));
+      return FPProcessNaN(op1);
+    } else if (std::isnan(op2)) {
+      VIXL_ASSERT(IsQuietNaN(op2));
+      return FPProcessNaN(op2);
+    } else {
+      return 0.0;
+    }
+  }
+
+  template <typename T>
+  T FPProcessNaNs3(T op1, T op2, T op3) {
+    if (IsSignallingNaN(op1)) {
+      return FPProcessNaN(op1);
+    } else if (IsSignallingNaN(op2)) {
+      return FPProcessNaN(op2);
+    } else if (IsSignallingNaN(op3)) {
+      return FPProcessNaN(op3);
+    } else if (std::isnan(op1)) {
+      VIXL_ASSERT(IsQuietNaN(op1));
+      return FPProcessNaN(op1);
+    } else if (std::isnan(op2)) {
+      VIXL_ASSERT(IsQuietNaN(op2));
+      return FPProcessNaN(op2);
+    } else if (std::isnan(op3)) {
+      VIXL_ASSERT(IsQuietNaN(op3));
+      return FPProcessNaN(op3);
+    } else {
+      return 0.0;
+    }
+  }
+
+  bool coloured_trace_;
+
+  // A set of TraceParameters flags.
+  int trace_parameters_;
+
+  // Indicates whether the instruction instrumentation is active.
+  bool instruction_stats_;
+
+  // Indicates whether the exclusive-access warning has been printed.
+  bool print_exclusive_access_warning_;
+  void PrintExclusiveAccessWarning();
+
+  // Indicates that the simulator ran out of memory at some point.
+  // Data structures may not be fully allocated.
+  bool oom_;
+
+ public:
+  // True if the simulator ran out of memory during or after construction.
+  bool oom() const { return oom_; }
+
+ protected:
+  mozilla::Vector<int64_t, 0, js::SystemAllocPolicy> spStack_;
+};
+
+}  // namespace vixl
+
+namespace js {
+namespace jit {
+
+class SimulatorProcess
+{
+ public:
+  static SimulatorProcess* singleton_;
+
+  SimulatorProcess()
+    : lock_(mutexid::Arm64SimulatorLock)
+    , redirection_(nullptr)
+  {}
+
+  // Synchronizes access between main thread and compilation threads.
+  js::Mutex lock_ MOZ_UNANNOTATED;
+  vixl::Redirection* redirection_;
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+  // For each simulator, record what other thread registered as instruction
+  // being invalidated.
+  struct ICacheFlush {
+    void* start;
+    size_t length;
+  };
+  using ICacheFlushes = mozilla::Vector<ICacheFlush, 2>;
+  struct SimFlushes {
+    vixl::Simulator* thread;
+    ICacheFlushes records;
+  };
+  mozilla::Vector<SimFlushes, 1> pendingFlushes_;
+
+  static void recordICacheFlush(void* start, size_t length);
+  static void membarrier();
+  static ICacheFlushes& getICacheFlushes(vixl::Simulator* sim);
+  [[nodiscard]] static bool registerSimulator(vixl::Simulator* sim);
+  static void unregisterSimulator(vixl::Simulator* sim);
+#endif
+
+  static void setRedirection(vixl::Redirection* redirection) {
+    singleton_->lock_.assertOwnedByCurrentThread();
+    singleton_->redirection_ = redirection;
+  }
+
+  static vixl::Redirection* redirection() {
+    singleton_->lock_.assertOwnedByCurrentThread();
+    return singleton_->redirection_;
+  }
+
+  static bool initialize() {
+    singleton_ = js_new<SimulatorProcess>();
+    return !!singleton_;
+  }
+  static void destroy() {
+    js_delete(singleton_);
+    singleton_ = nullptr;
+  }
+};
+
+// Protects the icache and redirection properties of the simulator.
+class AutoLockSimulatorCache : public js::LockGuard<js::Mutex>
+{
+  using Base = js::LockGuard<js::Mutex>;
+
+ public:
+  explicit AutoLockSimulatorCache()
+    : Base(SimulatorProcess::singleton_->lock_)
+  {
+  }
+};
+
+} // namespace jit
+} // namespace js
+
+#endif  // JS_SIMULATOR_ARM64
+#endif  // VIXL_A64_SIMULATOR_A64_H_
diff --git a/js/src/jit/arm64/vixl/Utils-vixl.cpp b/js/src/jit/arm64/vixl/Utils-vixl.cpp
new file mode 100644
index 0000000000..381c3501d1
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Utils-vixl.cpp
@@ -0,0 +1,555 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+#include <cstdio>
+
+namespace vixl {
+
+// The default NaN values (for FPCR.DN=1).
+const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
+const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
+const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
+
+// Floating-point zero values.
+const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
+const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
+
+// Floating-point infinity values.
+const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
+const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
+const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
+const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
+const double kFP64PositiveInfinity =
+    RawbitsToDouble(UINT64_C(0x7ff0000000000000));
+const double kFP64NegativeInfinity =
+    RawbitsToDouble(UINT64_C(0xfff0000000000000));
+
+bool IsZero(Float16 value) {
+  uint16_t bits = Float16ToRawbits(value);
+  return (bits == Float16ToRawbits(kFP16PositiveZero) ||
+          bits == Float16ToRawbits(kFP16NegativeZero));
+}
+
+uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
+
+uint32_t FloatToRawbits(float value) {
+  uint32_t bits = 0;
+  memcpy(&bits, &value, 4);
+  return bits;
+}
+
+
+uint64_t DoubleToRawbits(double value) {
+  uint64_t bits = 0;
+  memcpy(&bits, &value, 8);
+  return bits;
+}
+
+
+Float16 RawbitsToFloat16(uint16_t bits) {
+  Float16 f;
+  f.rawbits_ = bits;
+  return f;
+}
+
+
+float RawbitsToFloat(uint32_t bits) {
+  float value = 0.0;
+  memcpy(&value, &bits, 4);
+  return value;
+}
+
+
+double RawbitsToDouble(uint64_t bits) {
+  double value = 0.0;
+  memcpy(&value, &bits, 8);
+  return value;
+}
+
+
+uint32_t Float16Sign(internal::SimFloat16 val) {
+  uint16_t rawbits = Float16ToRawbits(val);
+  return ExtractUnsignedBitfield32(15, 15, rawbits);
+}
+
+
+uint32_t Float16Exp(internal::SimFloat16 val) {
+  uint16_t rawbits = Float16ToRawbits(val);
+  return ExtractUnsignedBitfield32(14, 10, rawbits);
+}
+
+uint32_t Float16Mantissa(internal::SimFloat16 val) {
+  uint16_t rawbits = Float16ToRawbits(val);
+  return ExtractUnsignedBitfield32(9, 0, rawbits);
+}
+
+
+uint32_t FloatSign(float val) {
+  uint32_t rawbits = FloatToRawbits(val);
+  return ExtractUnsignedBitfield32(31, 31, rawbits);
+}
+
+
+uint32_t FloatExp(float val) {
+  uint32_t rawbits = FloatToRawbits(val);
+  return ExtractUnsignedBitfield32(30, 23, rawbits);
+}
+
+
+uint32_t FloatMantissa(float val) {
+  uint32_t rawbits = FloatToRawbits(val);
+  return ExtractUnsignedBitfield32(22, 0, rawbits);
+}
+
+
+uint32_t DoubleSign(double val) {
+  uint64_t rawbits = DoubleToRawbits(val);
+  return static_cast<uint32_t>(ExtractUnsignedBitfield64(63, 63, rawbits));
+}
+
+
+uint32_t DoubleExp(double val) {
+  uint64_t rawbits = DoubleToRawbits(val);
+  return static_cast<uint32_t>(ExtractUnsignedBitfield64(62, 52, rawbits));
+}
+
+
+uint64_t DoubleMantissa(double val) {
+  uint64_t rawbits = DoubleToRawbits(val);
+  return ExtractUnsignedBitfield64(51, 0, rawbits);
+}
+
+
+internal::SimFloat16 Float16Pack(uint16_t sign,
+                                 uint16_t exp,
+                                 uint16_t mantissa) {
+  uint16_t bits = (sign << 15) | (exp << 10) | mantissa;
+  return RawbitsToFloat16(bits);
+}
+
+
+float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
+  uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
+  return RawbitsToFloat(bits);
+}
+
+
+double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) {
+  uint64_t bits = (sign << 63) | (exp << 52) | mantissa;
+  return RawbitsToDouble(bits);
+}
+
+
+int Float16Classify(Float16 value) {
+  uint16_t bits = Float16ToRawbits(value);
+  uint16_t exponent_max = (1 << 5) - 1;
+  uint16_t exponent_mask = exponent_max << 10;
+  uint16_t mantissa_mask = (1 << 10) - 1;
+
+  uint16_t exponent = (bits & exponent_mask) >> 10;
+  uint16_t mantissa = bits & mantissa_mask;
+  if (exponent == 0) {
+    if (mantissa == 0) {
+      return FP_ZERO;
+    }
+    return FP_SUBNORMAL;
+  } else if (exponent == exponent_max) {
+    if (mantissa == 0) {
+      return FP_INFINITE;
+    }
+    return FP_NAN;
+  }
+  return FP_NORMAL;
+}
+
+
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
+  VIXL_ASSERT((reg_size % 8) == 0);
+  int count = 0;
+  for (unsigned i = 0; i < (reg_size / 16); i++) {
+    if ((imm & 0xffff) == 0) {
+      count++;
+    }
+    imm >>= 16;
+  }
+  return count;
+}
+
+
+int BitCount(uint64_t value) { return CountSetBits(value); }
+
+// Float16 definitions.
+
+Float16::Float16(double dvalue) {
+  rawbits_ =
+      Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN));
+}
+
+namespace internal {
+
+SimFloat16 SimFloat16::operator-() const {
+  return RawbitsToFloat16(rawbits_ ^ 0x8000);
+}
+
+// SimFloat16 definitions.
+SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const {
+  return static_cast<double>(*this) + static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const {
+  return static_cast<double>(*this) - static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const {
+  return static_cast<double>(*this) * static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const {
+  return static_cast<double>(*this) / static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator<(SimFloat16 rhs) const {
+  return static_cast<double>(*this) < static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator>(SimFloat16 rhs) const {
+  return static_cast<double>(*this) > static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator==(SimFloat16 rhs) const {
+  if (IsNaN(*this) || IsNaN(rhs)) {
+    return false;
+  } else if (IsZero(rhs) && IsZero(*this)) {
+    // +0 and -0 should be treated as equal.
+    return true;
+  }
+  return this->rawbits_ == rhs.rawbits_;
+}
+
+bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); }
+
+bool SimFloat16::operator==(double rhs) const {
+  return static_cast<double>(*this) == static_cast<double>(rhs);
+}
+
+SimFloat16::operator double() const {
+  return FPToDouble(*this, kIgnoreDefaultNaN);
+}
+
+Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); }
+
+}  // namespace internal
+
+float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) {
+  uint16_t bits = Float16ToRawbits(value);
+  uint32_t sign = bits >> 15;
+  uint32_t exponent =
+      ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
+                                kFloat16MantissaBits,
+                                bits);
+  uint32_t mantissa =
+      ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits);
+
+  switch (Float16Classify(value)) {
+    case FP_ZERO:
+      return (sign == 0) ? 0.0f : -0.0f;
+
+    case FP_INFINITE:
+      return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
+
+    case FP_SUBNORMAL: {
+      // Calculate shift required to put mantissa into the most-significant bits
+      // of the destination mantissa.
+      int shift = CountLeadingZeros(mantissa << (32 - 10));
+
+      // Shift mantissa and discard implicit '1'.
+      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
+      mantissa &= (1 << kFloatMantissaBits) - 1;
+
+      // Adjust the exponent for the shift applied, and rebias.
+      exponent = exponent - shift + (-15 + 127);
+      break;
+    }
+
+    case FP_NAN:
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred entirely, except that the top
+      //    bit is forced to '1', making the result a quiet NaN. The unused
+      //    (low-order) payload bits are set to 0.
+      exponent = (1 << kFloatExponentBits) - 1;
+
+      // Increase bits in mantissa, making low-order bits 0.
+      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+      mantissa |= 1 << 22;  // Force a quiet NaN.
+      break;
+
+    case FP_NORMAL:
+      // Increase bits in mantissa, making low-order bits 0.
+      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+
+      // Change exponent bias.
+      exponent += (-15 + 127);
+      break;
+
+    default:
+      VIXL_UNREACHABLE();
+  }
+  return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
+                        mantissa);
+}
+
+
+float FPToFloat(double value,
+                FPRounding round_mode,
+                UseDefaultNaN DN,
+                bool* exception) {
+  // Only the FPTieEven rounding mode is implemented.
+  VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+  USE(round_mode);
+
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred as much as possible, except
+      //    that the top bit is forced to '1', making the result a quiet NaN.
+      uint64_t raw = DoubleToRawbits(value);
+
+      uint32_t sign = raw >> 63;
+      uint32_t exponent = (1 << 8) - 1;
+      uint32_t payload =
+          static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
+      payload |= (1 << 22);  // Force a quiet NaN.
+
+      return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
+    }
+
+    case FP_ZERO:
+    case FP_INFINITE: {
+      // In a C++ cast, any value representable in the target type will be
+      // unchanged. This is always the case for +/-0.0 and infinities.
+      return static_cast<float>(value);
+    }
+
+    case FP_NORMAL:
+    case FP_SUBNORMAL: {
+      // Convert double-to-float as the processor would, assuming that FPCR.FZ
+      // (flush-to-zero) is not set.
+      uint64_t raw = DoubleToRawbits(value);
+      // Extract the IEEE-754 double components.
+      uint32_t sign = raw >> 63;
+      // Extract the exponent and remove the IEEE-754 encoding bias.
+      int32_t exponent =
+          static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
+      // Extract the mantissa and add the implicit '1' bit.
+      uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
+      if (std::fpclassify(value) == FP_NORMAL) {
+        mantissa |= (UINT64_C(1) << 52);
+      }
+      return FPRoundToFloat(sign, exponent, mantissa, round_mode);
+    }
+  }
+
+  VIXL_UNREACHABLE();
+  return value;
+}
+
+// TODO: We should consider implementing a full FPToDouble(Float16)
+// conversion function (for performance reasons).
+double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) {
+  // We can rely on implicit float to double conversion here.
+  return FPToFloat(value, DN, exception);
+}
+
+
+double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred entirely, except that the top
+      //    bit is forced to '1', making the result a quiet NaN. The unused
+      //    (low-order) payload bits are set to 0.
+      uint32_t raw = FloatToRawbits(value);
+
+      uint64_t sign = raw >> 31;
+      uint64_t exponent = (1 << 11) - 1;
+      uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
+      payload <<= (52 - 23);           // The unused low-order bits should be 0.
+      payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
+
+      return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
+    }
+
+    case FP_ZERO:
+    case FP_NORMAL:
+    case FP_SUBNORMAL:
+    case FP_INFINITE: {
+      // All other inputs are preserved in a standard cast, because every value
+      // representable using an IEEE-754 float is also representable using an
+      // IEEE-754 double.
+      return static_cast<double>(value);
+    }
+  }
+
+  VIXL_UNREACHABLE();
+  return static_cast<double>(value);
+}
+
+
+Float16 FPToFloat16(float value,
+                    FPRounding round_mode,
+                    UseDefaultNaN DN,
+                    bool* exception) {
+  // Only the FPTieEven rounding mode is implemented.
+  VIXL_ASSERT(round_mode == FPTieEven);
+  USE(round_mode);
+
+  uint32_t raw = FloatToRawbits(value);
+  int32_t sign = raw >> 31;
+  int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
+  uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
+
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred as much as possible, except
+      //    that the top bit is forced to '1', making the result a quiet NaN.
+      uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
+                                    : Float16ToRawbits(kFP16NegativeInfinity);
+      result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
+      result |= (1 << 9);  // Force a quiet NaN;
+      return RawbitsToFloat16(result);
+    }
+
+    case FP_ZERO:
+      return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
+
+    case FP_INFINITE:
+      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+
+    case FP_NORMAL:
+    case FP_SUBNORMAL: {
+      // Convert float-to-half as the processor would, assuming that FPCR.FZ
+      // (flush-to-zero) is not set.
+
+      // Add the implicit '1' bit to the mantissa.
+      mantissa += (1 << 23);
+      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+    }
+  }
+
+  VIXL_UNREACHABLE();
+  return kFP16PositiveZero;
+}
+
+
+Float16 FPToFloat16(double value,
+                    FPRounding round_mode,
+                    UseDefaultNaN DN,
+                    bool* exception) {
+  // Only the FPTieEven rounding mode is implemented.
+  VIXL_ASSERT(round_mode == FPTieEven);
+  USE(round_mode);
+
+  uint64_t raw = DoubleToRawbits(value);
+  int32_t sign = raw >> 63;
+  int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
+  uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
+
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred as much as possible, except
+      //    that the top bit is forced to '1', making the result a quiet NaN.
+      uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
+                                    : Float16ToRawbits(kFP16NegativeInfinity);
+      result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
+      result |= (1 << 9);  // Force a quiet NaN;
+      return RawbitsToFloat16(result);
+    }
+
+    case FP_ZERO:
+      return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
+
+    case FP_INFINITE:
+      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+    case FP_NORMAL:
+    case FP_SUBNORMAL: {
+      // Convert double-to-half as the processor would, assuming that FPCR.FZ
+      // (flush-to-zero) is not set.
+
+      // Add the implicit '1' bit to the mantissa.
+      mantissa += (UINT64_C(1) << 52);
+      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+    }
+  }
+
+  VIXL_UNREACHABLE();
+  return kFP16PositiveZero;
+}
+
+}  // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Utils-vixl.h b/js/src/jit/arm64/vixl/Utils-vixl.h
new file mode 100644
index 0000000000..d1f6a835f8
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Utils-vixl.h
@@ -0,0 +1,1283 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_UTILS_H
+#define VIXL_UTILS_H
+
+#include "mozilla/FloatingPoint.h"
+
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <vector>
+
+#include "jit/arm64/vixl/CompilerIntrinsics-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+namespace vixl {
+
+// Macros for compile-time format checking.
+#if GCC_VERSION_OR_NEWER(4, 4, 0)
+#define PRINTF_CHECK(format_index, varargs_index) \
+  __attribute__((format(gnu_printf, format_index, varargs_index)))
+#else
+#define PRINTF_CHECK(format_index, varargs_index)
+#endif
+
+#ifdef __GNUC__
+#define VIXL_HAS_DEPRECATED_WITH_MSG
+#elif defined(__clang__)
+#ifdef __has_extension
+#define VIXL_HAS_DEPRECATED_WITH_MSG
+#endif
+#endif
+
+#ifdef VIXL_HAS_DEPRECATED_WITH_MSG
+#define VIXL_DEPRECATED(replaced_by, declarator) \
+  __attribute__((deprecated("Use \"" replaced_by "\" instead"))) declarator
+#else
+#define VIXL_DEPRECATED(replaced_by, declarator) declarator
+#endif
+
+#ifdef VIXL_DEBUG
+#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_UNREACHABLE()
+#else
+#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH()
+#endif
+
+template <typename T, size_t n>
+size_t ArrayLength(const T (&)[n]) {
+  return n;
+}
+
+// Check number width.
+// TODO: Refactor these using templates.
+inline bool IsIntN(unsigned n, uint32_t x) {
+  VIXL_ASSERT((0 < n) && (n < 32));
+  uint32_t limit = UINT32_C(1) << (n - 1);
+  return x < limit;
+}
+inline bool IsIntN(unsigned n, int32_t x) {
+  VIXL_ASSERT((0 < n) && (n < 32));
+  int32_t limit = INT32_C(1) << (n - 1);
+  return (-limit <= x) && (x < limit);
+}
+inline bool IsIntN(unsigned n, uint64_t x) {
+  VIXL_ASSERT((0 < n) && (n < 64));
+  uint64_t limit = UINT64_C(1) << (n - 1);
+  return x < limit;
+}
+inline bool IsIntN(unsigned n, int64_t x) {
+  VIXL_ASSERT((0 < n) && (n < 64));
+  int64_t limit = INT64_C(1) << (n - 1);
+  return (-limit <= x) && (x < limit);
+}
+VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) {
+  return IsIntN(n, x);
+}
+
+inline bool IsUintN(unsigned n, uint32_t x) {
+  VIXL_ASSERT((0 < n) && (n < 32));
+  return !(x >> n);
+}
+inline bool IsUintN(unsigned n, int32_t x) {
+  VIXL_ASSERT((0 < n) && (n < 32));
+  // Convert to an unsigned integer to avoid implementation-defined behavior.
+  return !(static_cast<uint32_t>(x) >> n);
+}
+inline bool IsUintN(unsigned n, uint64_t x) {
+  VIXL_ASSERT((0 < n) && (n < 64));
+  return !(x >> n);
+}
+inline bool IsUintN(unsigned n, int64_t x) {
+  VIXL_ASSERT((0 < n) && (n < 64));
+  // Convert to an unsigned integer to avoid implementation-defined behavior.
+  return !(static_cast<uint64_t>(x) >> n);
+}
+VIXL_DEPRECATED("IsUintN", inline bool is_uintn(unsigned n, int64_t x)) {
+  return IsUintN(n, x);
+}
+
+inline uint64_t TruncateToUintN(unsigned n, uint64_t x) {
+  VIXL_ASSERT((0 < n) && (n < 64));
+  return static_cast<uint64_t>(x) & ((UINT64_C(1) << n) - 1);
+}
+VIXL_DEPRECATED("TruncateToUintN",
+                inline uint64_t truncate_to_intn(unsigned n, int64_t x)) {
+  return TruncateToUintN(n, x);
+}
+
+// clang-format off
+#define INT_1_TO_32_LIST(V)                                                    \
+V(1)  V(2)  V(3)  V(4)  V(5)  V(6)  V(7)  V(8)                                 \
+V(9)  V(10) V(11) V(12) V(13) V(14) V(15) V(16)                                \
+V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24)                                \
+V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)
+
+#define INT_33_TO_63_LIST(V)                                                   \
+V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40)                                \
+V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48)                                \
+V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56)                                \
+V(57) V(58) V(59) V(60) V(61) V(62) V(63)
+
+#define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V)
+
+// clang-format on
+
+#define DECLARE_IS_INT_N(N)                                       \
+  inline bool IsInt##N(int64_t x) { return IsIntN(N, x); }        \
+  VIXL_DEPRECATED("IsInt" #N, inline bool is_int##N(int64_t x)) { \
+    return IsIntN(N, x);                                          \
+  }
+
+#define DECLARE_IS_UINT_N(N)                                        \
+  inline bool IsUint##N(int64_t x) { return IsUintN(N, x); }        \
+  VIXL_DEPRECATED("IsUint" #N, inline bool is_uint##N(int64_t x)) { \
+    return IsUintN(N, x);                                           \
+  }
+
+#define DECLARE_TRUNCATE_TO_UINT_32(N)                             \
+  inline uint32_t TruncateToUint##N(uint64_t x) {                  \
+    return static_cast<uint32_t>(TruncateToUintN(N, x));           \
+  }                                                                \
+  VIXL_DEPRECATED("TruncateToUint" #N,                             \
+                  inline uint32_t truncate_to_int##N(int64_t x)) { \
+    return TruncateToUint##N(x);                                   \
+  }
+
+INT_1_TO_63_LIST(DECLARE_IS_INT_N)
+INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
+INT_1_TO_32_LIST(DECLARE_TRUNCATE_TO_UINT_32)
+
+#undef DECLARE_IS_INT_N
+#undef DECLARE_IS_UINT_N
+#undef DECLARE_TRUNCATE_TO_INT_N
+
+// Bit field extraction.
+inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) {
+  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
+              (msb >= lsb));
+  if ((msb == 63) && (lsb == 0)) return x;
+  return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1);
+}
+
+
+inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) {
+  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
+              (msb >= lsb));
+  return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x));
+}
+
+
+inline int64_t ExtractSignedBitfield64(int msb, int lsb, int64_t x) {
+  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
+              (msb >= lsb));
+  uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x);
+  // If the highest extracted bit is set, sign extend.
+  if ((temp >> (msb - lsb)) == 1) {
+    temp |= ~UINT64_C(0) << (msb - lsb);
+  }
+  int64_t result;
+  memcpy(&result, &temp, sizeof(result));
+  return result;
+}
+
+
+inline int32_t ExtractSignedBitfield32(int msb, int lsb, int32_t x) {
+  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
+              (msb >= lsb));
+  uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x));
+  int32_t result;
+  memcpy(&result, &temp, sizeof(result));
+  return result;
+}
+
+
+inline uint64_t RotateRight(uint64_t value,
+                            unsigned int rotate,
+                            unsigned int width) {
+  VIXL_ASSERT((width > 0) && (width <= 64));
+  uint64_t width_mask = ~UINT64_C(0) >> (64 - width);
+  rotate &= 63;
+  if (rotate > 0) {
+    value &= width_mask;
+    value = (value << (width - rotate)) | (value >> rotate);
+  }
+  return value & width_mask;
+}
+
+
+// Wrapper class for passing FP16 values through the assembler.
+// This is purely to aid with type checking/casting.
+class Float16 {
+ public:
+  explicit Float16(double dvalue);
+  Float16() : rawbits_(0x0) {}
+  friend uint16_t Float16ToRawbits(Float16 value);
+  friend Float16 RawbitsToFloat16(uint16_t bits);
+
+ protected:
+  uint16_t rawbits_;
+};
+
+// Floating point representation.
+uint16_t Float16ToRawbits(Float16 value);
+
+
+uint32_t FloatToRawbits(float value);
+VIXL_DEPRECATED("FloatToRawbits",
+                inline uint32_t float_to_rawbits(float value)) {
+  return FloatToRawbits(value);
+}
+
+uint64_t DoubleToRawbits(double value);
+VIXL_DEPRECATED("DoubleToRawbits",
+                inline uint64_t double_to_rawbits(double value)) {
+  return DoubleToRawbits(value);
+}
+
+Float16 RawbitsToFloat16(uint16_t bits);
+
+float RawbitsToFloat(uint32_t bits);
+VIXL_DEPRECATED("RawbitsToFloat",
+                inline float rawbits_to_float(uint32_t bits)) {
+  return RawbitsToFloat(bits);
+}
+
+double RawbitsToDouble(uint64_t bits);
+VIXL_DEPRECATED("RawbitsToDouble",
+                inline double rawbits_to_double(uint64_t bits)) {
+  return RawbitsToDouble(bits);
+}
+
+namespace internal {
+
+// Internal simulation class used solely by the simulator to
+// provide an abstraction layer for any half-precision arithmetic.
+class SimFloat16 : public Float16 {
+ public:
+  // TODO: We should investigate making this constructor explicit.
+  // This is currently difficult to do due to a number of templated
+  // functions in the simulator which rely on returning double values.
+  SimFloat16(double dvalue) : Float16(dvalue) {}  // NOLINT(runtime/explicit)
+  SimFloat16(Float16 f) {                         // NOLINT(runtime/explicit)
+    this->rawbits_ = Float16ToRawbits(f);
+  }
+  SimFloat16() : Float16() {}
+  SimFloat16 operator-() const;
+  SimFloat16 operator+(SimFloat16 rhs) const;
+  SimFloat16 operator-(SimFloat16 rhs) const;
+  SimFloat16 operator*(SimFloat16 rhs) const;
+  SimFloat16 operator/(SimFloat16 rhs) const;
+  bool operator<(SimFloat16 rhs) const;
+  bool operator>(SimFloat16 rhs) const;
+  bool operator==(SimFloat16 rhs) const;
+  bool operator!=(SimFloat16 rhs) const;
+  // This is necessary for conversions peformed in (macro asm) Fmov.
+  bool operator==(double rhs) const;
+  operator double() const;
+};
+}  // namespace internal
+
+uint32_t Float16Sign(internal::SimFloat16 value);
+
+uint32_t Float16Exp(internal::SimFloat16 value);
+
+uint32_t Float16Mantissa(internal::SimFloat16 value);
+
+uint32_t FloatSign(float value);
+VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) {
+  return FloatSign(value);
+}
+
+uint32_t FloatExp(float value);
+VIXL_DEPRECATED("FloatExp", inline uint32_t float_exp(float value)) {
+  return FloatExp(value);
+}
+
+uint32_t FloatMantissa(float value);
+VIXL_DEPRECATED("FloatMantissa", inline uint32_t float_mantissa(float value)) {
+  return FloatMantissa(value);
+}
+
+uint32_t DoubleSign(double value);
+VIXL_DEPRECATED("DoubleSign", inline uint32_t double_sign(double value)) {
+  return DoubleSign(value);
+}
+
+uint32_t DoubleExp(double value);
+VIXL_DEPRECATED("DoubleExp", inline uint32_t double_exp(double value)) {
+  return DoubleExp(value);
+}
+
+uint64_t DoubleMantissa(double value);
+VIXL_DEPRECATED("DoubleMantissa",
+                inline uint64_t double_mantissa(double value)) {
+  return DoubleMantissa(value);
+}
+
+internal::SimFloat16 Float16Pack(uint16_t sign,
+                                 uint16_t exp,
+                                 uint16_t mantissa);
+
+float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa);
+VIXL_DEPRECATED("FloatPack",
+                inline float float_pack(uint32_t sign,
+                                        uint32_t exp,
+                                        uint32_t mantissa)) {
+  return FloatPack(sign, exp, mantissa);
+}
+
+double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa);
+VIXL_DEPRECATED("DoublePack",
+                inline double double_pack(uint32_t sign,
+                                          uint32_t exp,
+                                          uint64_t mantissa)) {
+  return DoublePack(sign, exp, mantissa);
+}
+
+// An fpclassify() function for 16-bit half-precision floats.
+int Float16Classify(Float16 value);
+VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) {
+  return Float16Classify(RawbitsToFloat16(value));
+}
+
+bool IsZero(Float16 value);
+
+inline bool IsNaN(float value) { return std::isnan(value); }
+
+inline bool IsNaN(double value) { return std::isnan(value); }
+
+inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; }
+
+inline bool IsInf(float value) { return std::isinf(value); }
+
+inline bool IsInf(double value) { return std::isinf(value); }
+
+inline bool IsInf(Float16 value) {
+  return Float16Classify(value) == FP_INFINITE;
+}
+
+
+// NaN tests.
+inline bool IsSignallingNaN(double num) {
+  const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
+  uint64_t raw = DoubleToRawbits(num);
+  if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) {
+    return true;
+  }
+  return false;
+}
+
+
+inline bool IsSignallingNaN(float num) {
+  const uint32_t kFP32QuietNaNMask = 0x00400000;
+  uint32_t raw = FloatToRawbits(num);
+  if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) {
+    return true;
+  }
+  return false;
+}
+
+
+inline bool IsSignallingNaN(Float16 num) {
+  const uint16_t kFP16QuietNaNMask = 0x0200;
+  return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0);
+}
+
+
+template <typename T>
+inline bool IsQuietNaN(T num) {
+  return IsNaN(num) && !IsSignallingNaN(num);
+}
+
+
+// Convert the NaN in 'num' to a quiet NaN.
+inline double ToQuietNaN(double num) {
+  const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
+  VIXL_ASSERT(IsNaN(num));
+  return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask);
+}
+
+
+inline float ToQuietNaN(float num) {
+  const uint32_t kFP32QuietNaNMask = 0x00400000;
+  VIXL_ASSERT(IsNaN(num));
+  return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask);
+}
+
+
+inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) {
+  const uint16_t kFP16QuietNaNMask = 0x0200;
+  VIXL_ASSERT(IsNaN(num));
+  return internal::SimFloat16(
+      RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask));
+}
+
+
+// Fused multiply-add.
+inline double FusedMultiplyAdd(double op1, double op2, double a) {
+  return fma(op1, op2, a);
+}
+
+
+inline float FusedMultiplyAdd(float op1, float op2, float a) {
+  return fmaf(op1, op2, a);
+}
+
+
+inline uint64_t LowestSetBit(uint64_t value) { return value & -value; }
+
+
+template <typename T>
+inline int HighestSetBitPosition(T value) {
+  VIXL_ASSERT(value != 0);
+  return (sizeof(value) * 8 - 1) - CountLeadingZeros(value);
+}
+
+
+template <typename V>
+inline int WhichPowerOf2(V value) {
+  VIXL_ASSERT(IsPowerOf2(value));
+  return CountTrailingZeros(value);
+}
+
+
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size);
+
+
+int BitCount(uint64_t value);
+
+
+template <typename T>
+T ReverseBits(T value) {
+  VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
+              (sizeof(value) == 4) || (sizeof(value) == 8));
+  T result = 0;
+  for (unsigned i = 0; i < (sizeof(value) * 8); i++) {
+    result = (result << 1) | (value & 1);
+    value >>= 1;
+  }
+  return result;
+}
+
+
+template <typename T>
+inline T SignExtend(T val, int bitSize) {
+  VIXL_ASSERT(bitSize > 0);
+  T mask = (T(2) << (bitSize - 1)) - T(1);
+  val &= mask;
+  T sign_bits = -((val >> (bitSize - 1)) << bitSize);
+  val |= sign_bits;
+  return val;
+}
+
+
+template <typename T>
+T ReverseBytes(T value, int block_bytes_log2) {
+  VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8));
+  VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value));
+  // Split the 64-bit value into an 8-bit array, where b[0] is the least
+  // significant byte, and b[7] is the most significant.
+  uint8_t bytes[8];
+  uint64_t mask = UINT64_C(0xff00000000000000);
+  for (int i = 7; i >= 0; i--) {
+    bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8);
+    mask >>= 8;
+  }
+
+  // Permutation tables for REV instructions.
+  //  permute_table[0] is used by REV16_x, REV16_w
+  //  permute_table[1] is used by REV32_x, REV_w
+  //  permute_table[2] is used by REV_x
+  VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4));
+  static const uint8_t permute_table[3][8] = {{6, 7, 4, 5, 2, 3, 0, 1},
+                                              {4, 5, 6, 7, 0, 1, 2, 3},
+                                              {0, 1, 2, 3, 4, 5, 6, 7}};
+  uint64_t temp = 0;
+  for (int i = 0; i < 8; i++) {
+    temp <<= 8;
+    temp |= bytes[permute_table[block_bytes_log2 - 1][i]];
+  }
+
+  T result;
+  VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(temp));
+  memcpy(&result, &temp, sizeof(result));
+  return result;
+}
+
+template <unsigned MULTIPLE, typename T>
+inline bool IsMultiple(T value) {
+  VIXL_ASSERT(IsPowerOf2(MULTIPLE));
+  return (value & (MULTIPLE - 1)) == 0;
+}
+
+template <typename T>
+inline bool IsMultiple(T value, unsigned multiple) {
+  VIXL_ASSERT(IsPowerOf2(multiple));
+  return (value & (multiple - 1)) == 0;
+}
+
+template <typename T>
+inline bool IsAligned(T pointer, int alignment) {
+  VIXL_ASSERT(IsPowerOf2(alignment));
+  return (pointer & (alignment - 1)) == 0;
+}
+
+// Pointer alignment
+// TODO: rename/refactor to make it specific to instructions.
+template <unsigned ALIGN, typename T>
+inline bool IsAligned(T pointer) {
+  VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t));  // NOLINT(runtime/sizeof)
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+  return IsAligned((intptr_t)(pointer), ALIGN);
+}
+
+template <typename T>
+bool IsWordAligned(T pointer) {
+  return IsAligned<4>(pointer);
+}
+
+// Increment a pointer until it has the specified alignment. The alignment must
+// be a power of two.
+template <class T>
+T AlignUp(T pointer,
+          typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) {
+  VIXL_ASSERT(IsPowerOf2(alignment));
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+
+  typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
+      (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
+
+  size_t mask = alignment - 1;
+  T result = (T)((pointer_raw + mask) & ~mask);
+  VIXL_ASSERT(result >= pointer);
+
+  return result;
+}
+
+// Decrement a pointer until it has the specified alignment. The alignment must
+// be a power of two.
+template <class T>
+T AlignDown(T pointer,
+            typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) {
+  VIXL_ASSERT(IsPowerOf2(alignment));
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+
+  typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
+      (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
+
+  size_t mask = alignment - 1;
+  return (T)(pointer_raw & ~mask);
+}
+
+
+template <typename T>
+inline T ExtractBit(T value, unsigned bit) {
+  return (value >> bit) & T(1);
+}
+
+template <typename Ts, typename Td>
+inline Td ExtractBits(Ts value, int least_significant_bit, Td mask) {
+  return Td((value >> least_significant_bit) & Ts(mask));
+}
+
+template <typename Ts, typename Td>
+inline void AssignBit(Td& dst,  // NOLINT(runtime/references)
+                      int bit,
+                      Ts value) {
+  VIXL_ASSERT((value == Ts(0)) || (value == Ts(1)));
+  VIXL_ASSERT(bit >= 0);
+  VIXL_ASSERT(bit < static_cast<int>(sizeof(Td) * 8));
+  Td mask(1);
+  dst &= ~(mask << bit);
+  dst |= Td(value) << bit;
+}
+
+template <typename Td, typename Ts>
+inline void AssignBits(Td& dst,  // NOLINT(runtime/references)
+                       int least_significant_bit,
+                       Ts mask,
+                       Ts value) {
+  VIXL_ASSERT(least_significant_bit >= 0);
+  VIXL_ASSERT(least_significant_bit < static_cast<int>(sizeof(Td) * 8));
+  VIXL_ASSERT(((Td(mask) << least_significant_bit) >> least_significant_bit) ==
+              Td(mask));
+  VIXL_ASSERT((value & mask) == value);
+  dst &= ~(Td(mask) << least_significant_bit);
+  dst |= Td(value) << least_significant_bit;
+}
+
+class VFP {
+ public:
+  static uint32_t FP32ToImm8(float imm) {
+    // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
+    uint32_t bits = FloatToRawbits(imm);
+    // bit7: a000.0000
+    uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
+    // bit6: 0b00.0000
+    uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
+    // bit5_to_0: 00cd.efgh
+    uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
+    return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
+  }
+  static uint32_t FP64ToImm8(double imm) {
+    // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+    //       0000.0000.0000.0000.0000.0000.0000.0000
+    uint64_t bits = DoubleToRawbits(imm);
+    // bit7: a000.0000
+    uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
+    // bit6: 0b00.0000
+    uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
+    // bit5_to_0: 00cd.efgh
+    uint64_t bit5_to_0 = (bits >> 48) & 0x3f;
+
+    return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
+  }
+  static float Imm8ToFP32(uint32_t imm8) {
+    //   Imm8: abcdefgh (8 bits)
+    // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
+    // where B is b ^ 1
+    uint32_t bits = imm8;
+    uint32_t bit7 = (bits >> 7) & 0x1;
+    uint32_t bit6 = (bits >> 6) & 0x1;
+    uint32_t bit5_to_0 = bits & 0x3f;
+    uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19);
+
+    return RawbitsToFloat(result);
+  }
+  static double Imm8ToFP64(uint32_t imm8) {
+    //   Imm8: abcdefgh (8 bits)
+    // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+    //         0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
+    // where B is b ^ 1
+    uint32_t bits = imm8;
+    uint64_t bit7 = (bits >> 7) & 0x1;
+    uint64_t bit6 = (bits >> 6) & 0x1;
+    uint64_t bit5_to_0 = bits & 0x3f;
+    uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48);
+    return RawbitsToDouble(result);
+  }
+  static bool IsImmFP32(float imm) {
+    // Valid values will have the form:
+    // aBbb.bbbc.defg.h000.0000.0000.0000.0000
+    uint32_t bits = FloatToRawbits(imm);
+    // bits[19..0] are cleared.
+    if ((bits & 0x7ffff) != 0) {
+      return false;
+    }
+
+
+    // bits[29..25] are all set or all cleared.
+    uint32_t b_pattern = (bits >> 16) & 0x3e00;
+    if (b_pattern != 0 && b_pattern != 0x3e00) {
+      return false;
+    }
+    // bit[30] and bit[29] are opposite.
+    if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
+      return false;
+    }
+    return true;
+  }
+  static bool IsImmFP64(double imm) {
+    // Valid values will have the form:
+    // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+    // 0000.0000.0000.0000.0000.0000.0000.0000
+    uint64_t bits = DoubleToRawbits(imm);
+    // bits[47..0] are cleared.
+    if ((bits & 0x0000ffffffffffff) != 0) {
+      return false;
+    }
+    // bits[61..54] are all set or all cleared.
+    uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+    if ((b_pattern != 0) && (b_pattern != 0x3fc0)) {
+      return false;
+    }
+    // bit[62] and bit[61] are opposite.
+    if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) {
+      return false;
+    }
+    return true;
+  }
+};
+
+class BitField {
+  // ForEachBitHelper is a functor that will call
+  // bool ForEachBitHelper::execute(ElementType id) const
+  //   and expects a boolean in return whether to continue (if true)
+  //   or stop (if false)
+  // check_set will check if the bits are on (true) or off(false)
+  template <typename ForEachBitHelper, bool check_set>
+  bool ForEachBit(const ForEachBitHelper& helper) {
+    for (int i = 0; static_cast<size_t>(i) < bitfield_.size(); i++) {
+      if (bitfield_[i] == check_set)
+        if (!helper.execute(i)) return false;
+    }
+    return true;
+  }
+
+ public:
+  explicit BitField(unsigned size) : bitfield_(size, 0) {}
+
+  void Set(int i) {
+    VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size()));
+    bitfield_[i] = true;
+  }
+
+  void Unset(int i) {
+    VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size()));
+    bitfield_[i] = true;
+  }
+
+  bool IsSet(int i) const { return bitfield_[i]; }
+
+  // For each bit not set in the bitfield call the execute functor
+  // execute.
+  // ForEachBitSetHelper::execute returns true if the iteration through
+  // the bits can continue, otherwise it will stop.
+  // struct ForEachBitSetHelper {
+  //   bool execute(int /*id*/) { return false; }
+  // };
+  template <typename ForEachBitNotSetHelper>
+  bool ForEachBitNotSet(const ForEachBitNotSetHelper& helper) {
+    return ForEachBit<ForEachBitNotSetHelper, false>(helper);
+  }
+
+  // For each bit set in the bitfield call the execute functor
+  // execute.
+  template <typename ForEachBitSetHelper>
+  bool ForEachBitSet(const ForEachBitSetHelper& helper) {
+    return ForEachBit<ForEachBitSetHelper, true>(helper);
+  }
+
+ private:
+  std::vector<bool> bitfield_;
+};
+
+namespace internal {
+
+typedef int64_t Int64;
+class Uint64;
+class Uint128;
+
+class Uint32 {
+  uint32_t data_;
+
+ public:
+  // Unlike uint32_t, Uint32 has a default constructor.
+  Uint32() { data_ = 0; }
+  explicit Uint32(uint32_t data) : data_(data) {}
+  inline explicit Uint32(Uint64 data);
+  uint32_t Get() const { return data_; }
+  template <int N>
+  int32_t GetSigned() const {
+    return ExtractSignedBitfield32(N - 1, 0, data_);
+  }
+  int32_t GetSigned() const { return data_; }
+  Uint32 operator~() const { return Uint32(~data_); }
+  Uint32 operator-() const { return Uint32(-data_); }
+  bool operator==(Uint32 value) const { return data_ == value.data_; }
+  bool operator!=(Uint32 value) const { return data_ != value.data_; }
+  bool operator>(Uint32 value) const { return data_ > value.data_; }
+  Uint32 operator+(Uint32 value) const { return Uint32(data_ + value.data_); }
+  Uint32 operator-(Uint32 value) const { return Uint32(data_ - value.data_); }
+  Uint32 operator&(Uint32 value) const { return Uint32(data_ & value.data_); }
+  Uint32 operator&=(Uint32 value) {
+    data_ &= value.data_;
+    return *this;
+  }
+  Uint32 operator^(Uint32 value) const { return Uint32(data_ ^ value.data_); }
+  Uint32 operator^=(Uint32 value) {
+    data_ ^= value.data_;
+    return *this;
+  }
+  Uint32 operator|(Uint32 value) const { return Uint32(data_ | value.data_); }
+  Uint32 operator|=(Uint32 value) {
+    data_ |= value.data_;
+    return *this;
+  }
+  // Unlike uint32_t, the shift functions can accept negative shift and
+  // return 0 when the shift is too big.
+  Uint32 operator>>(int shift) const {
+    if (shift == 0) return *this;
+    if (shift < 0) {
+      int tmp = -shift;
+      if (tmp >= 32) return Uint32(0);
+      return Uint32(data_ << tmp);
+    }
+    int tmp = shift;
+    if (tmp >= 32) return Uint32(0);
+    return Uint32(data_ >> tmp);
+  }
+  Uint32 operator<<(int shift) const {
+    if (shift == 0) return *this;
+    if (shift < 0) {
+      int tmp = -shift;
+      if (tmp >= 32) return Uint32(0);
+      return Uint32(data_ >> tmp);
+    }
+    int tmp = shift;
+    if (tmp >= 32) return Uint32(0);
+    return Uint32(data_ << tmp);
+  }
+};
+
+class Uint64 {
+  uint64_t data_;
+
+ public:
+  // Unlike uint64_t, Uint64 has a default constructor.
+  Uint64() { data_ = 0; }
+  explicit Uint64(uint64_t data) : data_(data) {}
+  explicit Uint64(Uint32 data) : data_(data.Get()) {}
+  inline explicit Uint64(Uint128 data);
+  uint64_t Get() const { return data_; }
+  int64_t GetSigned(int N) const {
+    return ExtractSignedBitfield64(N - 1, 0, data_);
+  }
+  int64_t GetSigned() const { return data_; }
+  Uint32 ToUint32() const {
+    VIXL_ASSERT((data_ >> 32) == 0);
+    return Uint32(static_cast<uint32_t>(data_));
+  }
+  Uint32 GetHigh32() const { return Uint32(data_ >> 32); }
+  Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); }
+  Uint64 operator~() const { return Uint64(~data_); }
+  Uint64 operator-() const { return Uint64(-data_); }
+  bool operator==(Uint64 value) const { return data_ == value.data_; }
+  bool operator!=(Uint64 value) const { return data_ != value.data_; }
+  Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); }
+  Uint64 operator-(Uint64 value) const { return Uint64(data_ - value.data_); }
+  Uint64 operator&(Uint64 value) const { return Uint64(data_ & value.data_); }
+  Uint64 operator&=(Uint64 value) {
+    data_ &= value.data_;
+    return *this;
+  }
+  Uint64 operator^(Uint64 value) const { return Uint64(data_ ^ value.data_); }
+  Uint64 operator^=(Uint64 value) {
+    data_ ^= value.data_;
+    return *this;
+  }
+  Uint64 operator|(Uint64 value) const { return Uint64(data_ | value.data_); }
+  Uint64 operator|=(Uint64 value) {
+    data_ |= value.data_;
+    return *this;
+  }
+  // Unlike uint64_t, the shift functions can accept negative shift and
+  // return 0 when the shift is too big.
+  Uint64 operator>>(int shift) const {
+    if (shift == 0) return *this;
+    if (shift < 0) {
+      int tmp = -shift;
+      if (tmp >= 64) return Uint64(0);
+      return Uint64(data_ << tmp);
+    }
+    int tmp = shift;
+    if (tmp >= 64) return Uint64(0);
+    return Uint64(data_ >> tmp);
+  }
+  Uint64 operator<<(int shift) const {
+    if (shift == 0) return *this;
+    if (shift < 0) {
+      int tmp = -shift;
+      if (tmp >= 64) return Uint64(0);
+      return Uint64(data_ >> tmp);
+    }
+    int tmp = shift;
+    if (tmp >= 64) return Uint64(0);
+    return Uint64(data_ << tmp);
+  }
+};
+
+class Uint128 {
+  uint64_t data_high_;
+  uint64_t data_low_;
+
+ public:
+  Uint128() : data_high_(0), data_low_(0) {}
+  explicit Uint128(uint64_t data_low) : data_high_(0), data_low_(data_low) {}
+  explicit Uint128(Uint64 data_low)
+      : data_high_(0), data_low_(data_low.Get()) {}
+  Uint128(uint64_t data_high, uint64_t data_low)
+      : data_high_(data_high), data_low_(data_low) {}
+  Uint64 ToUint64() const {
+    VIXL_ASSERT(data_high_ == 0);
+    return Uint64(data_low_);
+  }
+  Uint64 GetHigh64() const { return Uint64(data_high_); }
+  Uint64 GetLow64() const { return Uint64(data_low_); }
+  Uint128 operator~() const { return Uint128(~data_high_, ~data_low_); }
+  bool operator==(Uint128 value) const {
+    return (data_high_ == value.data_high_) && (data_low_ == value.data_low_);
+  }
+  Uint128 operator&(Uint128 value) const {
+    return Uint128(data_high_ & value.data_high_, data_low_ & value.data_low_);
+  }
+  Uint128 operator&=(Uint128 value) {
+    data_high_ &= value.data_high_;
+    data_low_ &= value.data_low_;
+    return *this;
+  }
+  Uint128 operator|=(Uint128 value) {
+    data_high_ |= value.data_high_;
+    data_low_ |= value.data_low_;
+    return *this;
+  }
+  Uint128 operator>>(int shift) const {
+    VIXL_ASSERT((shift >= 0) && (shift < 128));
+    if (shift == 0) return *this;
+    if (shift >= 64) {
+      return Uint128(0, data_high_ >> (shift - 64));
+    }
+    uint64_t tmp = (data_high_ << (64 - shift)) | (data_low_ >> shift);
+    return Uint128(data_high_ >> shift, tmp);
+  }
+  Uint128 operator<<(int shift) const {
+    VIXL_ASSERT((shift >= 0) && (shift < 128));
+    if (shift == 0) return *this;
+    if (shift >= 64) {
+      return Uint128(data_low_ << (shift - 64), 0);
+    }
+    uint64_t tmp = (data_high_ << shift) | (data_low_ >> (64 - shift));
+    return Uint128(tmp, data_low_ << shift);
+  }
+};
+
+Uint32::Uint32(Uint64 data) : data_(data.ToUint32().Get()) {}
+Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {}
+
+Int64 BitCount(Uint32 value);
+
+}  // namespace internal
+
+// The default NaN values (for FPCR.DN=1).
+extern const double kFP64DefaultNaN;
+extern const float kFP32DefaultNaN;
+extern const Float16 kFP16DefaultNaN;
+
+// Floating-point infinity values.
+extern const Float16 kFP16PositiveInfinity;
+extern const Float16 kFP16NegativeInfinity;
+extern const float kFP32PositiveInfinity;
+extern const float kFP32NegativeInfinity;
+extern const double kFP64PositiveInfinity;
+extern const double kFP64NegativeInfinity;
+
+// Floating-point zero values.
+extern const Float16 kFP16PositiveZero;
+extern const Float16 kFP16NegativeZero;
+
+// AArch64 floating-point specifics. These match IEEE-754.
+const unsigned kDoubleMantissaBits = 52;
+const unsigned kDoubleExponentBits = 11;
+const unsigned kFloatMantissaBits = 23;
+const unsigned kFloatExponentBits = 8;
+const unsigned kFloat16MantissaBits = 10;
+const unsigned kFloat16ExponentBits = 5;
+
+enum FPRounding {
+  // The first four values are encodable directly by FPCR<RMode>.
+  FPTieEven = 0x0,
+  FPPositiveInfinity = 0x1,
+  FPNegativeInfinity = 0x2,
+  FPZero = 0x3,
+
+  // The final rounding modes are only available when explicitly specified by
+  // the instruction (such as with fcvta). It cannot be set in FPCR.
+  FPTieAway,
+  FPRoundOdd
+};
+
+enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN };
+
+// Assemble the specified IEEE-754 components into the target type and apply
+// appropriate rounding.
+//  sign:     0 = positive, 1 = negative
+//  exponent: Unbiased IEEE-754 exponent.
+//  mantissa: The mantissa of the input. The top bit (which is not encoded for
+//            normal IEEE-754 values) must not be omitted. This bit has the
+//            value 'pow(2, exponent)'.
+//
+// The input value is assumed to be a normalized value. That is, the input may
+// not be infinity or NaN. If the source value is subnormal, it must be
+// normalized before calling this function such that the highest set bit in the
+// mantissa has the value 'pow(2, exponent)'.
+//
+// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
+// calling a templated FPRound.
+template <class T, int ebits, int mbits>
+T FPRound(int64_t sign,
+          int64_t exponent,
+          uint64_t mantissa,
+          FPRounding round_mode) {
+  VIXL_ASSERT((sign == 0) || (sign == 1));
+
+  // Only FPTieEven and FPRoundOdd rounding modes are implemented.
+  VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+
+  // Rounding can promote subnormals to normals, and normals to infinities. For
+  // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
+  // encodable as a float, but rounding based on the low-order mantissa bits
+  // could make it overflow. With ties-to-even rounding, this value would become
+  // an infinity.
+
+  // ---- Rounding Method ----
+  //
+  // The exponent is irrelevant in the rounding operation, so we treat the
+  // lowest-order bit that will fit into the result ('onebit') as having
+  // the value '1'. Similarly, the highest-order bit that won't fit into
+  // the result ('halfbit') has the value '0.5'. The 'point' sits between
+  // 'onebit' and 'halfbit':
+  //
+  //            These bits fit into the result.
+  //               |---------------------|
+  //  mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+  //                                     ||
+  //                                    / |
+  //                                   /  halfbit
+  //                               onebit
+  //
+  // For subnormal outputs, the range of representable bits is smaller and
+  // the position of onebit and halfbit depends on the exponent of the
+  // input, but the method is otherwise similar.
+  //
+  //   onebit(frac)
+  //     |
+  //     | halfbit(frac)          halfbit(adjusted)
+  //     | /                      /
+  //     | |                      |
+  //  0b00.0 (exact)      -> 0b00.0 (exact)                    -> 0b00
+  //  0b00.0...           -> 0b00.0...                         -> 0b00
+  //  0b00.1 (exact)      -> 0b00.0111..111                    -> 0b00
+  //  0b00.1...           -> 0b00.1...                         -> 0b01
+  //  0b01.0 (exact)      -> 0b01.0 (exact)                    -> 0b01
+  //  0b01.0...           -> 0b01.0...                         -> 0b01
+  //  0b01.1 (exact)      -> 0b01.1 (exact)                    -> 0b10
+  //  0b01.1...           -> 0b01.1...                         -> 0b10
+  //  0b10.0 (exact)      -> 0b10.0 (exact)                    -> 0b10
+  //  0b10.0...           -> 0b10.0...                         -> 0b10
+  //  0b10.1 (exact)      -> 0b10.0111..111                    -> 0b10
+  //  0b10.1...           -> 0b10.1...                         -> 0b11
+  //  0b11.0 (exact)      -> 0b11.0 (exact)                    -> 0b11
+  //  ...                   /             |                      /   |
+  //                       /              |                     /    |
+  //                                                           /     |
+  // adjusted = frac - (halfbit(mantissa) & ~onebit(frac));   /      |
+  //
+  //                   mantissa = (mantissa >> shift) + halfbit(adjusted);
+
+  static const int mantissa_offset = 0;
+  static const int exponent_offset = mantissa_offset + mbits;
+  static const int sign_offset = exponent_offset + ebits;
+  VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
+
+  // Bail out early for zero inputs.
+  if (mantissa == 0) {
+    return static_cast<T>(sign << sign_offset);
+  }
+
+  // If all bits in the exponent are set, the value is infinite or NaN.
+  // This is true for all binary IEEE-754 formats.
+  static const int infinite_exponent = (1 << ebits) - 1;
+  static const int max_normal_exponent = infinite_exponent - 1;
+
+  // Apply the exponent bias to encode it for the result. Doing this early makes
+  // it easy to detect values that will be infinite or subnormal.
+  exponent += max_normal_exponent >> 1;
+
+  if (exponent > max_normal_exponent) {
+    // Overflow: the input is too large for the result type to represent.
+    if (round_mode == FPTieEven) {
+      // FPTieEven rounding mode handles overflows using infinities.
+      exponent = infinite_exponent;
+      mantissa = 0;
+    } else {
+      VIXL_ASSERT(round_mode == FPRoundOdd);
+      // FPRoundOdd rounding mode handles overflows using the largest magnitude
+      // normal number.
+      exponent = max_normal_exponent;
+      mantissa = (UINT64_C(1) << exponent_offset) - 1;
+    }
+    return static_cast<T>((sign << sign_offset) |
+                          (exponent << exponent_offset) |
+                          (mantissa << mantissa_offset));
+  }
+
+  // Calculate the shift required to move the top mantissa bit to the proper
+  // place in the destination type.
+  const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
+  int shift = highest_significant_bit - mbits;
+
+  if (exponent <= 0) {
+    // The output will be subnormal (before rounding).
+    // For subnormal outputs, the shift must be adjusted by the exponent. The +1
+    // is necessary because the exponent of a subnormal value (encoded as 0) is
+    // the same as the exponent of the smallest normal value (encoded as 1).
+    shift += -exponent + 1;
+
+    // Handle inputs that would produce a zero output.
+    //
+    // Shifts higher than highest_significant_bit+1 will always produce a zero
+    // result. A shift of exactly highest_significant_bit+1 might produce a
+    // non-zero result after rounding.
+    if (shift > (highest_significant_bit + 1)) {
+      if (round_mode == FPTieEven) {
+        // The result will always be +/-0.0.
+        return static_cast<T>(sign << sign_offset);
+      } else {
+        VIXL_ASSERT(round_mode == FPRoundOdd);
+        VIXL_ASSERT(mantissa != 0);
+        // For FPRoundOdd, if the mantissa is too small to represent and
+        // non-zero return the next "odd" value.
+        return static_cast<T>((sign << sign_offset) | 1);
+      }
+    }
+
+    // Properly encode the exponent for a subnormal output.
+    exponent = 0;
+  } else {
+    // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
+    // normal values.
+    mantissa &= ~(UINT64_C(1) << highest_significant_bit);
+  }
+
+  // The casts below are only well-defined for unsigned integers.
+  VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer);
+  VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed);
+
+  if (shift > 0) {
+    if (round_mode == FPTieEven) {
+      // We have to shift the mantissa to the right. Some precision is lost, so
+      // we need to apply rounding.
+      uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
+      uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1;
+      uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
+      uint64_t adjusted = mantissa - adjustment;
+      T halfbit_adjusted = (adjusted >> (shift - 1)) & 1;
+
+      T result =
+          static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) |
+                         ((mantissa >> shift) << mantissa_offset));
+
+      // A very large mantissa can overflow during rounding. If this happens,
+      // the exponent should be incremented and the mantissa set to 1.0
+      // (encoded as 0). Applying halfbit_adjusted after assembling the float
+      // has the nice side-effect that this case is handled for free.
+      //
+      // This also handles cases where a very large finite value overflows to
+      // infinity, or where a very large subnormal value overflows to become
+      // normal.
+      return result + halfbit_adjusted;
+    } else {
+      VIXL_ASSERT(round_mode == FPRoundOdd);
+      // If any bits at position halfbit or below are set, onebit (ie. the
+      // bottom bit of the resulting mantissa) must be set.
+      uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
+      if (fractional_bits != 0) {
+        mantissa |= UINT64_C(1) << shift;
+      }
+
+      return static_cast<T>((sign << sign_offset) |
+                            (exponent << exponent_offset) |
+                            ((mantissa >> shift) << mantissa_offset));
+    }
+  } else {
+    // We have to shift the mantissa to the left (or not at all). The input
+    // mantissa is exactly representable in the output mantissa, so apply no
+    // rounding correction.
+    return static_cast<T>((sign << sign_offset) |
+                          (exponent << exponent_offset) |
+                          ((mantissa << -shift) << mantissa_offset));
+  }
+}
+
+
+// See FPRound for a description of this function.
+inline double FPRoundToDouble(int64_t sign,
+                              int64_t exponent,
+                              uint64_t mantissa,
+                              FPRounding round_mode) {
+  uint64_t bits =
+      FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
+                                                                  exponent,
+                                                                  mantissa,
+                                                                  round_mode);
+  return RawbitsToDouble(bits);
+}
+
+
+// See FPRound for a description of this function.
+inline Float16 FPRoundToFloat16(int64_t sign,
+                                int64_t exponent,
+                                uint64_t mantissa,
+                                FPRounding round_mode) {
+  return RawbitsToFloat16(
+      FPRound<uint16_t,
+              kFloat16ExponentBits,
+              kFloat16MantissaBits>(sign, exponent, mantissa, round_mode));
+}
+
+
+// See FPRound for a description of this function.
+static inline float FPRoundToFloat(int64_t sign,
+                                   int64_t exponent,
+                                   uint64_t mantissa,
+                                   FPRounding round_mode) {
+  uint32_t bits =
+      FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
+                                                                exponent,
+                                                                mantissa,
+                                                                round_mode);
+  return RawbitsToFloat(bits);
+}
+
+
+float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
+float FPToFloat(double value,
+                FPRounding round_mode,
+                UseDefaultNaN DN,
+                bool* exception = NULL);
+
+double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
+double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL);
+
+Float16 FPToFloat16(float value,
+                    FPRounding round_mode,
+                    UseDefaultNaN DN,
+                    bool* exception = NULL);
+
+Float16 FPToFloat16(double value,
+                    FPRounding round_mode,
+                    UseDefaultNaN DN,
+                    bool* exception = NULL);
+}  // namespace vixl
+
+#endif  // VIXL_UTILS_H
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
commit	36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree	105e8c98ddea1c1e4784a60a5a6410fa416be2de /js/src/jit/arm
parent	Initial commit. (diff)
download	firefox-esr-upstream.tar.xz firefox-esr-upstream.zip