Adding upstream version 115.8.0esr.upstream/115.8.0esr

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 01:47:29 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 01:47:29 +0000
commit: 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
tree: a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /js/src/jit/arm
parent: Initial commit. (diff)
download: firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz
firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip
29 files changed, 32951 insertions, 0 deletions
diff --git a/js/src/jit/arm/Architecture-arm.cpp b/js/src/jit/arm/Architecture-arm.cpp
new file mode 100644
index 0000000000..d4c5026705
--- /dev/null
+++ b/js/src/jit/arm/Architecture-arm.cpp
@@ -0,0 +1,540 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/Architecture-arm.h"
+
+#if !defined(JS_SIMULATOR_ARM) && !defined(__APPLE__)
+#  include <elf.h>
+#endif
+
+#include <fcntl.h>
+#ifdef XP_UNIX
+#  include <unistd.h>
+#endif
+
+#if defined(XP_IOS)
+#  include <libkern/OSCacheControl.h>
+#endif
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/arm/Simulator-arm.h"
+#include "jit/FlushICache.h"  // js::jit::FlushICache
+#include "jit/RegisterSets.h"
+
+#if !defined(__linux__) || defined(ANDROID) || defined(JS_SIMULATOR_ARM)
+// The Android NDK and B2G do not include the hwcap.h kernel header, and it is
+// not defined when building the simulator, so inline the header defines we
+// need.
+#  define HWCAP_VFP (1 << 6)
+#  define HWCAP_NEON (1 << 12)
+#  define HWCAP_VFPv3 (1 << 13)
+#  define HWCAP_VFPv3D16 (1 << 14) /* also set for VFPv4-D16 */
+#  define HWCAP_VFPv4 (1 << 16)
+#  define HWCAP_IDIVA (1 << 17)
+#  define HWCAP_IDIVT (1 << 18)
+#  define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */
+#  define AT_HWCAP 16
+#else
+#  include <asm/hwcap.h>
+#  if !defined(HWCAP_IDIVA)
+#    define HWCAP_IDIVA (1 << 17)
+#  endif
+#  if !defined(HWCAP_VFPD32)
+#    define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */
+#  endif
+#endif
+
+namespace js {
+namespace jit {
+
+// Parse the Linux kernel cpuinfo features. This is also used to parse the
+// override features which has some extensions: 'armv7', 'align' and 'hardfp'.
+static uint32_t ParseARMCpuFeatures(const char* features,
+                                    bool override = false) {
+  uint32_t flags = 0;
+
+  // For ease of running tests we want it to be the default to fixup faults.
+  bool fixupAlignmentFault = true;
+
+  for (;;) {
+    char ch = *features;
+    if (!ch) {
+      // End of string.
+      break;
+    }
+    if (ch == ' ' || ch == ',') {
+      // Skip separator characters.
+      features++;
+      continue;
+    }
+    // Find the end of the token.
+    const char* end = features + 1;
+    for (;; end++) {
+      ch = *end;
+      if (!ch || ch == ' ' || ch == ',') {
+        break;
+      }
+    }
+    size_t count = end - features;
+    if (count == 3 && strncmp(features, "vfp", 3) == 0) {
+      flags |= HWCAP_VFP;
+    } else if (count == 5 && strncmp(features, "vfpv2", 5) == 0) {
+      flags |= HWCAP_VFP;  // vfpv2 is the same as vfp
+    } else if (count == 4 && strncmp(features, "neon", 4) == 0) {
+      flags |= HWCAP_NEON;
+    } else if (count == 5 && strncmp(features, "vfpv3", 5) == 0) {
+      flags |= HWCAP_VFPv3;
+    } else if (count == 8 && strncmp(features, "vfpv3d16", 8) == 0) {
+      flags |= HWCAP_VFPv3D16;
+    } else if (count == 5 && strncmp(features, "vfpv4", 5) == 0) {
+      flags |= HWCAP_VFPv4;
+    } else if (count == 5 && strncmp(features, "idiva", 5) == 0) {
+      flags |= HWCAP_IDIVA;
+    } else if (count == 5 && strncmp(features, "idivt", 5) == 0) {
+      flags |= HWCAP_IDIVT;
+    } else if (count == 6 && strncmp(features, "vfpd32", 6) == 0) {
+      flags |= HWCAP_VFPD32;
+    } else if (count == 5 && strncmp(features, "armv7", 5) == 0) {
+      flags |= HWCAP_ARMv7;
+    } else if (count == 5 && strncmp(features, "align", 5) == 0) {
+      flags |= HWCAP_ALIGNMENT_FAULT | HWCAP_FIXUP_FAULT;
+#if defined(JS_SIMULATOR_ARM)
+    } else if (count == 7 && strncmp(features, "nofixup", 7) == 0) {
+      fixupAlignmentFault = false;
+    } else if (count == 6 && strncmp(features, "hardfp", 6) == 0) {
+      flags |= HWCAP_USE_HARDFP_ABI;
+#endif
+    } else if (override) {
+      fprintf(stderr, "Warning: unexpected ARM feature at: %s\n", features);
+    }
+    features = end;
+  }
+
+  if (!fixupAlignmentFault) {
+    flags &= ~HWCAP_FIXUP_FAULT;
+  }
+
+  return flags;
+}
+
+static uint32_t CanonicalizeARMHwCapFlags(uint32_t flags) {
+  // Canonicalize the flags. These rules are also applied to the features
+  // supplied for simulation.
+
+  // VFPv3 is a subset of VFPv4, force this if the input string omits it.
+  if (flags & HWCAP_VFPv4) {
+    flags |= HWCAP_VFPv3;
+  }
+
+  // The VFPv3 feature is expected when the VFPv3D16 is reported, but add it
+  // just in case of a kernel difference in feature reporting.
+  if (flags & HWCAP_VFPv3D16) {
+    flags |= HWCAP_VFPv3;
+  }
+
+  // VFPv2 is a subset of VFPv3, force this if the input string omits it.  VFPv2
+  // is just an alias for VFP.
+  if (flags & HWCAP_VFPv3) {
+    flags |= HWCAP_VFP;
+  }
+
+  // If we have Neon we have floating point.
+  if (flags & HWCAP_NEON) {
+    flags |= HWCAP_VFP;
+  }
+
+  // If VFPv3 or Neon is supported then this must be an ARMv7.
+  if (flags & (HWCAP_VFPv3 | HWCAP_NEON)) {
+    flags |= HWCAP_ARMv7;
+  }
+
+  // Some old kernels report VFP and not VFPv3, but if ARMv7 then it must be
+  // VFPv3.
+  if ((flags & HWCAP_VFP) && (flags & HWCAP_ARMv7)) {
+    flags |= HWCAP_VFPv3;
+  }
+
+  // Older kernels do not implement the HWCAP_VFPD32 flag.
+  if ((flags & HWCAP_VFPv3) && !(flags & HWCAP_VFPv3D16)) {
+    flags |= HWCAP_VFPD32;
+  }
+
+  return flags;
+}
+
+#if !defined(JS_SIMULATOR_ARM) && (defined(__linux__) || defined(ANDROID))
+static bool forceDoubleCacheFlush = false;
+#endif
+
+// The override flags parsed from the ARMHWCAP environment variable or from the
+// --arm-hwcap js shell argument.  They are stable after startup: there is no
+// longer a programmatic way of setting these from JS.
+volatile uint32_t armHwCapFlags = HWCAP_UNINITIALIZED;
+
+bool CPUFlagsHaveBeenComputed() { return armHwCapFlags != HWCAP_UNINITIALIZED; }
+
+static const char* gArmHwCapString = nullptr;
+
+void SetARMHwCapFlagsString(const char* armHwCap) {
+  MOZ_ASSERT(!CPUFlagsHaveBeenComputed());
+  gArmHwCapString = armHwCap;
+}
+
+static void ParseARMHwCapFlags(const char* armHwCap) {
+  MOZ_ASSERT(armHwCap);
+
+  if (strstr(armHwCap, "help")) {
+    fflush(NULL);
+    printf(
+        "\n"
+        "usage: ARMHWCAP=option,option,option,... where options can be:\n"
+        "\n"
+        "  vfp      \n"
+        "  neon     \n"
+        "  vfpv3    \n"
+        "  vfpv3d16 \n"
+        "  vfpv4    \n"
+        "  idiva    \n"
+        "  idivt    \n"
+        "  vfpd32   \n"
+        "  armv7    \n"
+        "  align    - unaligned accesses will trap and be emulated\n"
+#ifdef JS_SIMULATOR_ARM
+        "  nofixup  - disable emulation of unaligned accesses\n"
+        "  hardfp   \n"
+#endif
+        "\n");
+    exit(0);
+    /*NOTREACHED*/
+  }
+
+  uint32_t flags = ParseARMCpuFeatures(armHwCap, /* override = */ true);
+
+#ifdef JS_CODEGEN_ARM_HARDFP
+  flags |= HWCAP_USE_HARDFP_ABI;
+#endif
+
+  armHwCapFlags = CanonicalizeARMHwCapFlags(flags);
+  JitSpew(JitSpew_Codegen, "ARM HWCAP: 0x%x\n", armHwCapFlags);
+}
+
+void InitARMFlags() {
+  MOZ_RELEASE_ASSERT(armHwCapFlags == HWCAP_UNINITIALIZED);
+
+  if (const char* env = getenv("ARMHWCAP")) {
+    ParseARMHwCapFlags(env);
+    return;
+  }
+
+  if (gArmHwCapString) {
+    ParseARMHwCapFlags(gArmHwCapString);
+    return;
+  }
+
+  uint32_t flags = 0;
+#ifdef JS_SIMULATOR_ARM
+  // HWCAP_FIXUP_FAULT is on by default even if HWCAP_ALIGNMENT_FAULT is
+  // not on by default, because some memory access instructions always fault.
+  // Notably, this is true for floating point accesses.
+  flags = HWCAP_ARMv7 | HWCAP_VFP | HWCAP_VFPv3 | HWCAP_VFPv4 | HWCAP_NEON |
+          HWCAP_IDIVA | HWCAP_FIXUP_FAULT;
+#else
+
+#  if defined(__linux__) || defined(ANDROID)
+  // This includes Android and B2G.
+  bool readAuxv = false;
+  int fd = open("/proc/self/auxv", O_RDONLY);
+  if (fd > 0) {
+    struct {
+      uint32_t a_type;
+      uint32_t a_val;
+    } aux;
+    while (read(fd, &aux, sizeof(aux))) {
+      if (aux.a_type == AT_HWCAP) {
+        flags = aux.a_val;
+        readAuxv = true;
+        break;
+      }
+    }
+    close(fd);
+  }
+
+  FILE* fp = fopen("/proc/cpuinfo", "r");
+  if (fp) {
+    char buf[1024] = {};
+    size_t len = fread(buf, sizeof(char), sizeof(buf) - 1, fp);
+    fclose(fp);
+    buf[len] = '\0';
+
+    // Read the cpuinfo Features if the auxv is not available.
+    if (!readAuxv) {
+      char* featureList = strstr(buf, "Features");
+      if (featureList) {
+        if (char* featuresEnd = strstr(featureList, "\n")) {
+          *featuresEnd = '\0';
+        }
+        flags = ParseARMCpuFeatures(featureList + 8);
+      }
+      if (strstr(buf, "ARMv7")) {
+        flags |= HWCAP_ARMv7;
+      }
+    }
+
+    // The exynos7420 cpu (EU galaxy S6 (Note)) has a bug where sometimes
+    // flushing doesn't invalidate the instruction cache. As a result we force
+    // it by calling the cacheFlush twice on different start addresses.
+    char* exynos7420 = strstr(buf, "Exynos7420");
+    if (exynos7420) {
+      forceDoubleCacheFlush = true;
+    }
+  }
+#  endif
+
+  // If compiled to use specialized features then these features can be
+  // assumed to be present otherwise the compiler would fail to run.
+
+#  ifdef JS_CODEGEN_ARM_HARDFP
+  // Compiled to use the hardfp ABI.
+  flags |= HWCAP_USE_HARDFP_ABI;
+#  endif
+
+#  if defined(__VFP_FP__) && !defined(__SOFTFP__)
+  // Compiled to use VFP instructions so assume VFP support.
+  flags |= HWCAP_VFP;
+#  endif
+
+#  if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
+  // Compiled to use ARMv7 instructions so assume the ARMv7 arch.
+  flags |= HWCAP_ARMv7;
+#  endif
+
+#  if defined(__APPLE__)
+#    if defined(__ARM_NEON__)
+  flags |= HWCAP_NEON;
+#    endif
+#    if defined(__ARMVFPV3__)
+  flags |= HWCAP_VFPv3 | HWCAP_VFPD32
+#    endif
+#  endif
+
+#endif  // JS_SIMULATOR_ARM
+
+  armHwCapFlags = CanonicalizeARMHwCapFlags(flags);
+
+  JitSpew(JitSpew_Codegen, "ARM HWCAP: 0x%x\n", armHwCapFlags);
+  return;
+}
+
+uint32_t GetARMFlags() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags;
+}
+
+bool HasNEON() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_NEON;
+}
+
+bool HasARMv7() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ARMv7;
+}
+
+bool HasMOVWT() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ARMv7;
+}
+
+bool HasLDSTREXBHD() {
+  // These are really available from ARMv6K and later, but why bother?
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ARMv7;
+}
+
+bool HasDMBDSBISB() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ARMv7;
+}
+
+bool HasVFPv3() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_VFPv3;
+}
+
+bool HasVFP() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_VFP;
+}
+
+bool Has32DP() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_VFPD32;
+}
+
+bool HasIDIV() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_IDIVA;
+}
+
+// This is defined in the header and inlined when not using the simulator.
+#ifdef JS_SIMULATOR_ARM
+bool UseHardFpABI() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_USE_HARDFP_ABI;
+}
+#endif
+
+Registers::Code Registers::FromName(const char* name) {
+  // Check for some register aliases first.
+  if (strcmp(name, "ip") == 0) {
+    return ip;
+  }
+  if (strcmp(name, "r13") == 0) {
+    return r13;
+  }
+  if (strcmp(name, "lr") == 0) {
+    return lr;
+  }
+  if (strcmp(name, "r15") == 0) {
+    return r15;
+  }
+
+  for (size_t i = 0; i < Total; i++) {
+    if (strcmp(GetName(i), name) == 0) {
+      return Code(i);
+    }
+  }
+
+  return Invalid;
+}
+
+FloatRegisters::Code FloatRegisters::FromName(const char* name) {
+  for (size_t i = 0; i < TotalSingle; ++i) {
+    if (strcmp(GetSingleName(Encoding(i)), name) == 0) {
+      return VFPRegister(i, VFPRegister::Single).code();
+    }
+  }
+  for (size_t i = 0; i < TotalDouble; ++i) {
+    if (strcmp(GetDoubleName(Encoding(i)), name) == 0) {
+      return VFPRegister(i, VFPRegister::Double).code();
+    }
+  }
+
+  return Invalid;
+}
+
+FloatRegisterSet VFPRegister::ReduceSetForPush(const FloatRegisterSet& s) {
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  LiveFloatRegisterSet mod;
+  for (FloatRegisterIterator iter(s); iter.more(); ++iter) {
+    if ((*iter).isSingle()) {
+      // Add in just this float.
+      mod.addUnchecked(*iter);
+    } else if ((*iter).id() < 16) {
+      // A double with an overlay, add in both floats.
+      mod.addUnchecked((*iter).singleOverlay(0));
+      mod.addUnchecked((*iter).singleOverlay(1));
+    } else {
+      // Add in the lone double in the range 16-31.
+      mod.addUnchecked(*iter);
+    }
+  }
+  return mod.set();
+}
+
+uint32_t VFPRegister::GetPushSizeInBytes(const FloatRegisterSet& s) {
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  FloatRegisterSet ss = s.reduceSetForPush();
+  uint64_t bits = ss.bits();
+  uint32_t ret = mozilla::CountPopulation32(bits & 0xffffffff) * sizeof(float);
+  ret += mozilla::CountPopulation32(bits >> 32) * sizeof(double);
+  return ret;
+}
+uint32_t VFPRegister::getRegisterDumpOffsetInBytes() {
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  if (isSingle()) {
+    return id() * sizeof(float);
+  }
+  if (isDouble()) {
+    return id() * sizeof(double);
+  }
+  MOZ_CRASH("not Single or Double");
+}
+
+uint32_t FloatRegisters::ActualTotalPhys() {
+  if (Has32DP()) {
+    return 32;
+  }
+  return 16;
+}
+
+void FlushICache(void* code, size_t size) {
+#if defined(JS_SIMULATOR_ARM)
+  js::jit::SimulatorProcess::FlushICache(code, size);
+
+#elif (defined(__linux__) || defined(ANDROID)) && defined(__GNUC__)
+  void* end = (void*)(reinterpret_cast<char*>(code) + size);
+  asm volatile(
+      "push    {r7}\n"
+      "mov     r0, %0\n"
+      "mov     r1, %1\n"
+      "mov     r7, #0xf0000\n"
+      "add     r7, r7, #0x2\n"
+      "mov     r2, #0x0\n"
+      "svc     0x0\n"
+      "pop     {r7}\n"
+      :
+      : "r"(code), "r"(end)
+      : "r0", "r1", "r2");
+
+  if (forceDoubleCacheFlush) {
+    void* start = (void*)((uintptr_t)code + 1);
+    asm volatile(
+        "push    {r7}\n"
+        "mov     r0, %0\n"
+        "mov     r1, %1\n"
+        "mov     r7, #0xf0000\n"
+        "add     r7, r7, #0x2\n"
+        "mov     r2, #0x0\n"
+        "svc     0x0\n"
+        "pop     {r7}\n"
+        :
+        : "r"(start), "r"(end)
+        : "r0", "r1", "r2");
+  }
+
+#elif defined(__FreeBSD__) || defined(__NetBSD__)
+  __clear_cache(code, reinterpret_cast<char*>(code) + size);
+
+#elif defined(XP_IOS)
+  sys_icache_invalidate(code, size);
+
+#else
+#  error "Unexpected platform"
+#endif
+}
+
+void FlushExecutionContext() {
+#ifndef JS_SIMULATOR_ARM
+  // Ensure that any instructions already in the pipeline are discarded and
+  // reloaded from the icache.
+  asm volatile("isb\n" : : : "memory");
+#else
+  // We assume the icache flushing routines on other platforms take care of this
+#endif
+}
+
+}  // namespace jit
+}  // namespace js
diff --git a/js/src/jit/arm/Architecture-arm.h b/js/src/jit/arm/Architecture-arm.h
new file mode 100644
index 0000000000..fa2ae8e0ed
--- /dev/null
+++ b/js/src/jit/arm/Architecture-arm.h
@@ -0,0 +1,733 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_Architecture_arm_h
+#define jit_arm_Architecture_arm_h
+
+#include "mozilla/MathAlgorithms.h"
+
+#include <algorithm>
+#include <limits.h>
+#include <stdint.h>
+
+#include "jit/shared/Architecture-shared.h"
+
+#include "js/Utility.h"
+
+// GCC versions 4.6 and above define __ARM_PCS_VFP to denote a hard-float
+// ABI target. The iOS toolchain doesn't define anything specific here,
+// but iOS always supports VFP.
+#if defined(__ARM_PCS_VFP) || defined(XP_IOS)
+#  define JS_CODEGEN_ARM_HARDFP
+#endif
+
+namespace js {
+namespace jit {
+
+// These offsets are specific to nunboxing, and capture offsets into the
+// components of a js::Value.
+static const int32_t NUNBOX32_TYPE_OFFSET = 4;
+static const int32_t NUNBOX32_PAYLOAD_OFFSET = 0;
+
+static const uint32_t ShadowStackSpace = 0;
+
+// How far forward/back can a jump go? Provide a generous buffer for thunks.
+static const uint32_t JumpImmediateRange = 20 * 1024 * 1024;
+
+class Registers {
+ public:
+  enum RegisterID {
+    r0 = 0,
+    r1,
+    r2,
+    r3,
+    r4,
+    r5,
+    r6,
+    r7,
+    r8,
+    r9,
+    r10,
+    r11,
+    fp = r11,
+    r12,
+    ip = r12,
+    r13,
+    sp = r13,
+    r14,
+    lr = r14,
+    r15,
+    pc = r15,
+    invalid_reg
+  };
+  typedef uint8_t Code;
+  typedef RegisterID Encoding;
+
+  // Content spilled during bailouts.
+  union RegisterContent {
+    uintptr_t r;
+  };
+
+  static const char* GetName(Code code) {
+    MOZ_ASSERT(code < Total);
+    static const char* const Names[] = {"r0",  "r1", "r2",  "r3", "r4",  "r5",
+                                        "r6",  "r7", "r8",  "r9", "r10", "r11",
+                                        "r12", "sp", "r14", "pc"};
+    return Names[code];
+  }
+  static const char* GetName(Encoding i) { return GetName(Code(i)); }
+
+  static Code FromName(const char* name);
+
+  static const Encoding StackPointer = sp;
+  static const Encoding Invalid = invalid_reg;
+
+  static const uint32_t Total = 16;
+  static const uint32_t Allocatable = 13;
+
+  typedef uint32_t SetType;
+
+  static const SetType AllMask = (1 << Total) - 1;
+  static const SetType ArgRegMask =
+      (1 << r0) | (1 << r1) | (1 << r2) | (1 << r3);
+
+  static const SetType VolatileMask =
+      (1 << r0) | (1 << r1) | (1 << Registers::r2) |
+      (1 << Registers::r3)
+#if defined(XP_IOS)
+      // per
+      // https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html#//apple_ref/doc/uid/TP40009021-SW4
+      | (1 << Registers::r9)
+#endif
+      ;
+
+  static const SetType NonVolatileMask =
+      (1 << Registers::r4) | (1 << Registers::r5) | (1 << Registers::r6) |
+      (1 << Registers::r7) | (1 << Registers::r8) |
+#if !defined(XP_IOS)
+      (1 << Registers::r9) |
+#endif
+      (1 << Registers::r10) | (1 << Registers::r11) | (1 << Registers::r12) |
+      (1 << Registers::r14);
+
+  static const SetType WrapperMask = VolatileMask |          // = arguments
+                                     (1 << Registers::r4) |  // = outReg
+                                     (1 << Registers::r5);   // = argBase
+
+  static const SetType NonAllocatableMask =
+      (1 << Registers::sp) | (1 << Registers::r12) |  // r12 = ip = scratch
+      (1 << Registers::lr) | (1 << Registers::pc) | (1 << Registers::fp);
+
+  // Registers returned from a JS -> JS call.
+  static const SetType JSCallMask = (1 << Registers::r2) | (1 << Registers::r3);
+
+  // Registers returned from a JS -> C call.
+  static const SetType CallMask =
+      (1 << Registers::r0) |
+      (1 << Registers::r1);  // Used for double-size returns.
+
+  static const SetType AllocatableMask = AllMask & ~NonAllocatableMask;
+
+  static uint32_t SetSize(SetType x) {
+    static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+    return mozilla::CountPopulation32(x);
+  }
+  static uint32_t FirstBit(SetType x) {
+    return mozilla::CountTrailingZeroes32(x);
+  }
+  static uint32_t LastBit(SetType x) {
+    return 31 - mozilla::CountLeadingZeroes32(x);
+  }
+};
+
+// Smallest integer type that can hold a register bitmask.
+typedef uint16_t PackedRegisterMask;
+
+class FloatRegisters {
+ public:
+  enum FPRegisterID {
+    s0,
+    s1,
+    s2,
+    s3,
+    s4,
+    s5,
+    s6,
+    s7,
+    s8,
+    s9,
+    s10,
+    s11,
+    s12,
+    s13,
+    s14,
+    s15,
+    s16,
+    s17,
+    s18,
+    s19,
+    s20,
+    s21,
+    s22,
+    s23,
+    s24,
+    s25,
+    s26,
+    s27,
+    s28,
+    s29,
+    s30,
+    s31,
+    d0,
+    d1,
+    d2,
+    d3,
+    d4,
+    d5,
+    d6,
+    d7,
+    d8,
+    d9,
+    d10,
+    d11,
+    d12,
+    d13,
+    d14,
+    d15,
+    d16,
+    d17,
+    d18,
+    d19,
+    d20,
+    d21,
+    d22,
+    d23,
+    d24,
+    d25,
+    d26,
+    d27,
+    d28,
+    d29,
+    d30,
+    d31,
+    invalid_freg
+  };
+
+  typedef uint32_t Code;
+  typedef FPRegisterID Encoding;
+
+  // Content spilled during bailouts.
+  union RegisterContent {
+    double d;
+  };
+
+  static const char* GetDoubleName(Encoding code) {
+    static const char* const Names[] = {
+        "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
+        "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
+        "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+        "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
+    return Names[code];
+  }
+  static const char* GetSingleName(Encoding code) {
+    static const char* const Names[] = {
+        "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
+        "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
+        "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+        "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"};
+    return Names[code];
+  }
+
+  static Code FromName(const char* name);
+
+  static const Encoding Invalid = invalid_freg;
+  static const uint32_t Total = 48;
+  static const uint32_t TotalDouble = 16;
+  static const uint32_t TotalSingle = 32;
+  static const uint32_t Allocatable = 45;
+  // There are only 32 places that we can put values.
+  static const uint32_t TotalPhys = 32;
+  static uint32_t ActualTotalPhys();
+
+  /* clang-format off */
+    // ARM float registers overlap in a way that for 1 double registers, in the
+    // range d0-d15, we have 2 singles register in the range s0-s31. d16-d31
+    // have no single register aliases.  The aliasing rule state that d{n}
+    // aliases s{2n} and s{2n+1}, for n in [0 .. 15].
+    //
+    // The register set is used to represent either allocatable register or live
+    // registers. The register maps d0-d15 and s0-s31 to a single bit each. The
+    // registers d16-d31 are not used at the moment.
+    //
+    // uuuu uuuu uuuu uuuu dddd dddd dddd dddd ssss ssss ssss ssss ssss ssss ssss ssss
+    //                     ^                 ^ ^                                     ^
+    //                     '-- d15      d0 --' '-- s31                          s0 --'
+    //
+    // LiveSet are handled by adding the bit of each register without
+    // considering the aliases.
+    //
+    // AllocatableSet are handled by adding and removing the bit of each
+    // aligned-or-dominated-aliased registers.
+    //
+    //     ...0...00... : s{2n}, s{2n+1} and d{n} are not available
+    //     ...1...01... : s{2n} is available (*)
+    //     ...0...10... : s{2n+1} is available
+    //     ...1...11... : s{2n}, s{2n+1} and d{n} are available
+    //
+    // (*) Note that d{n} bit is set, but is not available because s{2n+1} bit
+    // is not set, which is required as d{n} dominates s{2n+1}. The d{n} bit is
+    // set, because s{2n} is aligned.
+    //
+    //        |        d{n}       |
+    //        | s{2n+1} |  s{2n}  |
+    //
+  /* clang-format on */
+  typedef uint64_t SetType;
+  static const SetType AllSingleMask = (1ull << TotalSingle) - 1;
+  static const SetType AllDoubleMask = ((1ull << TotalDouble) - 1)
+                                       << TotalSingle;
+  static const SetType AllMask = AllDoubleMask | AllSingleMask;
+
+  // d15 is the ScratchFloatReg.
+  static const SetType NonVolatileDoubleMask =
+      ((1ULL << d8) | (1ULL << d9) | (1ULL << d10) | (1ULL << d11) |
+       (1ULL << d12) | (1ULL << d13) | (1ULL << d14));
+  // s30 and s31 alias d15.
+  static const SetType NonVolatileMask =
+      (NonVolatileDoubleMask |
+       ((1 << s16) | (1 << s17) | (1 << s18) | (1 << s19) | (1 << s20) |
+        (1 << s21) | (1 << s22) | (1 << s23) | (1 << s24) | (1 << s25) |
+        (1 << s26) | (1 << s27) | (1 << s28) | (1 << s29) | (1 << s30)));
+
+  static const SetType VolatileMask = AllMask & ~NonVolatileMask;
+  static const SetType VolatileDoubleMask =
+      AllDoubleMask & ~NonVolatileDoubleMask;
+
+  static const SetType WrapperMask = VolatileMask;
+
+  // d15 is the ARM scratch float register.
+  // s30 and s31 alias d15.
+  static const SetType NonAllocatableMask =
+      ((1ULL << d15)) | (1ULL << s30) | (1ULL << s31);
+
+  static const SetType AllocatableMask = AllMask & ~NonAllocatableMask;
+};
+
+static const uint32_t SpillSlotSize =
+    std::max(sizeof(Registers::RegisterContent),
+             sizeof(FloatRegisters::RegisterContent));
+
+template <typename T>
+class TypedRegisterSet;
+
+class VFPRegister {
+ public:
+  // What type of data is being stored in this register? UInt / Int are
+  // specifically for vcvt, where we need to know how the data is supposed to
+  // be converted.
+  enum RegType : uint8_t { Single = 0x0, Double = 0x1, UInt = 0x2, Int = 0x3 };
+
+  typedef FloatRegisters Codes;
+  typedef Codes::Code Code;
+  typedef Codes::Encoding Encoding;
+
+  // Bitfields below are all uint32_t to make sure MSVC packs them correctly.
+ public:
+  // ARM doesn't have more than 32 registers of each type, so 5 bits should
+  // suffice.
+  uint32_t code_ : 5;
+
+ protected:
+  uint32_t kind : 2;
+  uint32_t _isInvalid : 1;
+  uint32_t _isMissing : 1;
+
+ public:
+  constexpr VFPRegister(uint32_t r, RegType k)
+      : code_(Code(r)), kind(k), _isInvalid(false), _isMissing(false) {}
+  constexpr VFPRegister()
+      : code_(Code(0)), kind(Double), _isInvalid(true), _isMissing(false) {}
+
+  constexpr VFPRegister(RegType k, uint32_t id, bool invalid, bool missing)
+      : code_(Code(id)), kind(k), _isInvalid(invalid), _isMissing(missing) {}
+
+  explicit constexpr VFPRegister(Code id)
+      : code_(id), kind(Double), _isInvalid(false), _isMissing(false) {}
+  bool operator==(const VFPRegister& other) const {
+    return kind == other.kind && code_ == other.code_ &&
+           isInvalid() == other.isInvalid();
+  }
+  bool operator!=(const VFPRegister& other) const { return !operator==(other); }
+
+  bool isSingle() const { return kind == Single; }
+  bool isDouble() const { return kind == Double; }
+  bool isSimd128() const { return false; }
+  bool isFloat() const { return (kind == Double) || (kind == Single); }
+  bool isInt() const { return (kind == UInt) || (kind == Int); }
+  bool isSInt() const { return kind == Int; }
+  bool isUInt() const { return kind == UInt; }
+  bool equiv(const VFPRegister& other) const { return other.kind == kind; }
+  size_t size() const { return (kind == Double) ? 8 : 4; }
+  bool isInvalid() const { return _isInvalid; }
+  bool isMissing() const {
+    MOZ_ASSERT(!_isInvalid);
+    return _isMissing;
+  }
+
+  VFPRegister doubleOverlay(unsigned int which = 0) const;
+  VFPRegister singleOverlay(unsigned int which = 0) const;
+  VFPRegister sintOverlay(unsigned int which = 0) const;
+  VFPRegister uintOverlay(unsigned int which = 0) const;
+
+  VFPRegister asSingle() const { return singleOverlay(); }
+  VFPRegister asDouble() const { return doubleOverlay(); }
+  VFPRegister asSimd128() const { MOZ_CRASH("NYI"); }
+
+  struct VFPRegIndexSplit;
+  VFPRegIndexSplit encode();
+
+  // For serializing values.
+  struct VFPRegIndexSplit {
+    const uint32_t block : 4;
+    const uint32_t bit : 1;
+
+   private:
+    friend VFPRegIndexSplit js::jit::VFPRegister::encode();
+
+    VFPRegIndexSplit(uint32_t block_, uint32_t bit_)
+        : block(block_), bit(bit_) {
+      MOZ_ASSERT(block == block_);
+      MOZ_ASSERT(bit == bit_);
+    }
+  };
+
+  Code code() const {
+    MOZ_ASSERT(!_isInvalid && !_isMissing);
+    // This should only be used in areas where we only have doubles and
+    // singles.
+    MOZ_ASSERT(isFloat());
+    return Code(code_ | (kind << 5));
+  }
+  Encoding encoding() const {
+    MOZ_ASSERT(!_isInvalid && !_isMissing);
+    return Encoding(code_);
+  }
+  uint32_t id() const { return code_; }
+  static VFPRegister FromCode(uint32_t i) {
+    uint32_t code = i & 31;
+    uint32_t kind = i >> 5;
+    return VFPRegister(code, RegType(kind));
+  }
+  bool volatile_() const {
+    if (isDouble()) {
+      return !!((1ULL << (code_ >> 1)) & FloatRegisters::VolatileMask);
+    }
+    return !!((1ULL << code_) & FloatRegisters::VolatileMask);
+  }
+  const char* name() const {
+    if (isDouble()) {
+      return FloatRegisters::GetDoubleName(Encoding(code_));
+    }
+    return FloatRegisters::GetSingleName(Encoding(code_));
+  }
+  bool aliases(const VFPRegister& other) {
+    if (kind == other.kind) {
+      return code_ == other.code_;
+    }
+    return doubleOverlay() == other.doubleOverlay();
+  }
+  static const int NumAliasedDoubles = 16;
+  uint32_t numAliased() const {
+    if (isDouble()) {
+      if (code_ < NumAliasedDoubles) {
+        return 3;
+      }
+      return 1;
+    }
+    return 2;
+  }
+
+  VFPRegister aliased(uint32_t aliasIdx) {
+    if (aliasIdx == 0) {
+      return *this;
+    }
+    if (isDouble()) {
+      MOZ_ASSERT(code_ < NumAliasedDoubles);
+      MOZ_ASSERT(aliasIdx <= 2);
+      return singleOverlay(aliasIdx - 1);
+    }
+    MOZ_ASSERT(aliasIdx == 1);
+    return doubleOverlay(aliasIdx - 1);
+  }
+  uint32_t numAlignedAliased() const {
+    if (isDouble()) {
+      if (code_ < NumAliasedDoubles) {
+        return 2;
+      }
+      return 1;
+    }
+    // s1 has 0 other aligned aliases, 1 total.
+    // s0 has 1 other aligned aliase, 2 total.
+    return 2 - (code_ & 1);
+  }
+  // |   d0    |
+  // | s0 | s1 |
+  // If we've stored s0 and s1 in memory, we also want to say that d0 is
+  // stored there, but it is only stored at the location where it is aligned
+  // e.g. at s0, not s1.
+  VFPRegister alignedAliased(uint32_t aliasIdx) {
+    if (aliasIdx == 0) {
+      return *this;
+    }
+    MOZ_ASSERT(aliasIdx == 1);
+    if (isDouble()) {
+      MOZ_ASSERT(code_ < NumAliasedDoubles);
+      return singleOverlay(aliasIdx - 1);
+    }
+    MOZ_ASSERT((code_ & 1) == 0);
+    return doubleOverlay(aliasIdx - 1);
+  }
+
+  typedef FloatRegisters::SetType SetType;
+
+  // This function is used to ensure that Register set can take all Single
+  // registers, even if we are taking a mix of either double or single
+  // registers.
+  //
+  //   s0.alignedOrDominatedAliasedSet() == s0 | d0.
+  //   s1.alignedOrDominatedAliasedSet() == s1.
+  //   d0.alignedOrDominatedAliasedSet() == s0 | s1 | d0.
+  //
+  // This way the Allocatable register set does not have to do any arithmetics
+  // to know if a register is available or not, as we have the following
+  // relations:
+  //
+  //  d0.alignedOrDominatedAliasedSet() ==
+  //      s0.alignedOrDominatedAliasedSet() | s1.alignedOrDominatedAliasedSet()
+  //
+  //  s0.alignedOrDominatedAliasedSet() & s1.alignedOrDominatedAliasedSet() == 0
+  //
+  SetType alignedOrDominatedAliasedSet() const {
+    if (isSingle()) {
+      if (code_ % 2 != 0) {
+        return SetType(1) << code_;
+      }
+      return (SetType(1) << code_) | (SetType(1) << (32 + code_ / 2));
+    }
+
+    MOZ_ASSERT(isDouble());
+    return (SetType(0b11) << (code_ * 2)) | (SetType(1) << (32 + code_));
+  }
+
+  static constexpr RegTypeName DefaultType = RegTypeName::Float64;
+
+  template <RegTypeName = DefaultType>
+  static SetType LiveAsIndexableSet(SetType s) {
+    return SetType(0);
+  }
+
+  template <RegTypeName Name = DefaultType>
+  static SetType AllocatableAsIndexableSet(SetType s) {
+    static_assert(Name != RegTypeName::Any, "Allocatable set are not iterable");
+    return SetType(0);
+  }
+
+  static uint32_t SetSize(SetType x) {
+    static_assert(sizeof(SetType) == 8, "SetType must be 64 bits");
+    return mozilla::CountPopulation32(x);
+  }
+  static Code FromName(const char* name) {
+    return FloatRegisters::FromName(name);
+  }
+  static TypedRegisterSet<VFPRegister> ReduceSetForPush(
+      const TypedRegisterSet<VFPRegister>& s);
+  static uint32_t GetPushSizeInBytes(const TypedRegisterSet<VFPRegister>& s);
+  uint32_t getRegisterDumpOffsetInBytes();
+  static uint32_t FirstBit(SetType x) {
+    return mozilla::CountTrailingZeroes64(x);
+  }
+  static uint32_t LastBit(SetType x) {
+    return 63 - mozilla::CountLeadingZeroes64(x);
+  }
+};
+
+template <>
+inline VFPRegister::SetType
+VFPRegister::LiveAsIndexableSet<RegTypeName::Float32>(SetType set) {
+  return set & FloatRegisters::AllSingleMask;
+}
+
+template <>
+inline VFPRegister::SetType
+VFPRegister::LiveAsIndexableSet<RegTypeName::Float64>(SetType set) {
+  return set & FloatRegisters::AllDoubleMask;
+}
+
+template <>
+inline VFPRegister::SetType VFPRegister::LiveAsIndexableSet<RegTypeName::Any>(
+    SetType set) {
+  return set;
+}
+
+template <>
+inline VFPRegister::SetType
+VFPRegister::AllocatableAsIndexableSet<RegTypeName::Float32>(SetType set) {
+  // Single registers are not dominating any smaller registers, thus masking
+  // is enough to convert an allocatable set into a set of register list all
+  // single register available.
+  return set & FloatRegisters::AllSingleMask;
+}
+
+template <>
+inline VFPRegister::SetType
+VFPRegister::AllocatableAsIndexableSet<RegTypeName::Float64>(SetType set) {
+  /* clang-format off */
+    // An allocatable float register set is represented as follow:
+    //
+    // uuuu uuuu uuuu uuuu dddd dddd dddd dddd ssss ssss ssss ssss ssss ssss ssss ssss
+    //                     ^                 ^ ^                                     ^
+    //                     '-- d15      d0 --' '-- s31                          s0 --'
+    //
+    //     ...0...00... : s{2n}, s{2n+1} and d{n} are not available
+    //     ...1...01... : s{2n} is available
+    //     ...0...10... : s{2n+1} is available
+    //     ...1...11... : s{2n}, s{2n+1} and d{n} are available
+    //
+    // The goal of this function is to return the set of double registers which
+    // are available as an indexable bit set. This implies that iff a double bit
+    // is set in the returned set, then the register is available.
+    //
+    // To do so, this functions converts the 32 bits set of single registers
+    // into a 16 bits set of equivalent double registers. Then, we mask out
+    // double registers which do not have all the single register that compose
+    // them. As d{n} bit is set when s{2n} is available, we only need to take
+    // s{2n+1} into account.
+  /* clang-format on */
+
+  // Convert  s7s6s5s4 s3s2s1s0  into  s7s5s3s1, for all s0-s31.
+  SetType s2d = AllocatableAsIndexableSet<RegTypeName::Float32>(set);
+  static_assert(FloatRegisters::TotalSingle == 32, "Wrong mask");
+  s2d = (0xaaaaaaaa & s2d) >> 1;  // Filter s{2n+1} registers.
+  // Group adjacent bits as follow:
+  //     0.0.s3.s1 == ((0.s3.0.s1) >> 1 | (0.s3.0.s1)) & 0b0011;
+  s2d = ((s2d >> 1) | s2d) & 0x33333333;  // 0a0b --> 00ab
+  s2d = ((s2d >> 2) | s2d) & 0x0f0f0f0f;  // 00ab00cd --> 0000abcd
+  s2d = ((s2d >> 4) | s2d) & 0x00ff00ff;
+  s2d = ((s2d >> 8) | s2d) & 0x0000ffff;
+  // Move the s7s5s3s1 to the aliased double positions.
+  s2d = s2d << FloatRegisters::TotalSingle;
+
+  // Note: We currently do not use any representation for d16-d31.
+  static_assert(FloatRegisters::TotalDouble == 16,
+                "d16-d31 do not have a single register mapping");
+
+  // Filter out any double register which are not allocatable due to
+  // non-aligned dominated single registers.
+  return set & s2d;
+}
+
+// The only floating point register set that we work with are the VFP Registers.
+typedef VFPRegister FloatRegister;
+
+uint32_t GetARMFlags();
+bool HasARMv7();
+bool HasMOVWT();
+bool HasLDSTREXBHD();  // {LD,ST}REX{B,H,D}
+bool HasDMBDSBISB();   // DMB, DSB, and ISB
+bool HasVFPv3();
+bool HasVFP();
+bool Has32DP();
+bool HasIDIV();
+bool HasNEON();
+
+extern volatile uint32_t armHwCapFlags;
+
+// Not part of the HWCAP flag, but we need to know these and these bits are not
+// used. Define these here so that their use can be inlined by the simulator.
+
+// A bit to flag when signaled alignment faults are to be fixed up.
+#define HWCAP_FIXUP_FAULT (1 << 24)
+
+// A bit to flag when the flags are uninitialized, so they can be atomically
+// set.
+#define HWCAP_UNINITIALIZED (1 << 25)
+
+// A bit to flag when alignment faults are enabled and signal.
+#define HWCAP_ALIGNMENT_FAULT (1 << 26)
+
+// A bit to flag the use of the hardfp ABI.
+#define HWCAP_USE_HARDFP_ABI (1 << 27)
+
+// A bit to flag the use of the ARMv7 arch, otherwise ARMv6.
+#define HWCAP_ARMv7 (1 << 28)
+
+// Top three bits are reserved, do not use them.
+
+// Returns true when cpu alignment faults are enabled and signaled, and thus we
+// should ensure loads and stores are aligned.
+inline bool HasAlignmentFault() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_ALIGNMENT_FAULT;
+}
+
+#ifdef JS_SIMULATOR_ARM
+// Returns true when cpu alignment faults will be fixed up by the
+// "operating system", which functionality we will emulate.
+inline bool FixupFault() {
+  MOZ_ASSERT(armHwCapFlags != HWCAP_UNINITIALIZED);
+  return armHwCapFlags & HWCAP_FIXUP_FAULT;
+}
+#endif
+
+// Arm/D32 has double registers that can NOT be treated as float32 and this
+// requires some dances in lowering.
+inline bool hasUnaliasedDouble() { return Has32DP(); }
+
+// On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32 to
+// a double as a temporary, you need a temporary double register.
+inline bool hasMultiAlias() { return true; }
+
+// InitARMFlags is called from the JitContext constructor to read the hardware
+// flags.  The call is a no-op after the first call, or if the JS shell has
+// already set the flags (it has a command line switch for this, see
+// ParseARMHwCapFlags).
+//
+// If the environment variable ARMHWCAP is set then the flags are read from it
+// instead; see ParseARMHwCapFlags.
+void InitARMFlags();
+
+// Register a string denoting ARM hardware flags. During engine initialization,
+// these flags will then be used instead of the actual hardware capabilities.
+// This must be called before JS_Init and the passed string's buffer must
+// outlive the JS_Init call.
+void SetARMHwCapFlagsString(const char* armHwCap);
+
+// Retrive the ARM hardware flags at a bitmask.  They must have been set.
+uint32_t GetARMFlags();
+
+// If the simulator is used then the ABI choice is dynamic. Otherwise the ABI is
+// static and useHardFpABI is inlined so that unused branches can be optimized
+// away.
+#ifdef JS_SIMULATOR_ARM
+bool UseHardFpABI();
+#else
+static inline bool UseHardFpABI() {
+#  if defined(JS_CODEGEN_ARM_HARDFP)
+  return true;
+#  else
+  return false;
+#  endif
+}
+#endif
+
+// In order to handle SoftFp ABI calls, we need to be able to express that we
+// have ABIArg which are represented by pair of general purpose registers.
+#define JS_CODEGEN_REGISTER_PAIR 1
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_Architecture_arm_h */
diff --git a/js/src/jit/arm/Assembler-arm.cpp b/js/src/jit/arm/Assembler-arm.cpp
new file mode 100644
index 0000000000..a1213b6f21
--- /dev/null
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -0,0 +1,2832 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/Assembler-arm.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Sprintf.h"
+
+#include <type_traits>
+
+#include "gc/Marking.h"
+#include "jit/arm/disasm/Disasm-arm.h"
+#include "jit/arm/MacroAssembler-arm.h"
+#include "jit/AutoWritableJitCode.h"
+#include "jit/ExecutableAllocator.h"
+#include "jit/MacroAssembler.h"
+#include "vm/Realm.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::CountLeadingZeroes32;
+using mozilla::DebugOnly;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+
+void dbg_break() {}
+
+// The ABIArgGenerator is used for making system ABI calls and for inter-wasm
+// calls. The system ABI can either be SoftFp or HardFp, and inter-wasm calls
+// are always HardFp calls. The initialization defaults to HardFp, and the ABI
+// choice is made before any system ABI calls with the method "setUseHardFp".
+ABIArgGenerator::ABIArgGenerator()
+    : intRegIndex_(0),
+      floatRegIndex_(0),
+      stackOffset_(0),
+      current_(),
+      useHardFp_(true) {}
+
+// See the "Parameter Passing" section of the "Procedure Call Standard for the
+// ARM Architecture" documentation.
+ABIArg ABIArgGenerator::softNext(MIRType type) {
+  switch (type) {
+    case MIRType::Int32:
+    case MIRType::Pointer:
+    case MIRType::RefOrNull:
+    case MIRType::StackResults:
+      if (intRegIndex_ == NumIntArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint32_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_));
+      intRegIndex_++;
+      break;
+    case MIRType::Int64:
+      // Make sure to use an even register index. Increase to next even number
+      // when odd.
+      intRegIndex_ = (intRegIndex_ + 1) & ~1;
+      if (intRegIndex_ == NumIntArgRegs) {
+        // Align the stack on 8 bytes.
+        static const uint32_t align = sizeof(uint64_t) - 1;
+        stackOffset_ = (stackOffset_ + align) & ~align;
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint64_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_),
+                        Register::FromCode(intRegIndex_ + 1));
+      intRegIndex_ += 2;
+      break;
+    case MIRType::Float32:
+      if (intRegIndex_ == NumIntArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint32_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_));
+      intRegIndex_++;
+      break;
+    case MIRType::Double:
+      // Make sure to use an even register index. Increase to next even number
+      // when odd.
+      intRegIndex_ = (intRegIndex_ + 1) & ~1;
+      if (intRegIndex_ == NumIntArgRegs) {
+        // Align the stack on 8 bytes.
+        static const uint32_t align = sizeof(double) - 1;
+        stackOffset_ = (stackOffset_ + align) & ~align;
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(double);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_),
+                        Register::FromCode(intRegIndex_ + 1));
+      intRegIndex_ += 2;
+      break;
+    default:
+      MOZ_CRASH("Unexpected argument type");
+  }
+
+  return current_;
+}
+
+ABIArg ABIArgGenerator::hardNext(MIRType type) {
+  switch (type) {
+    case MIRType::Int32:
+    case MIRType::Pointer:
+    case MIRType::RefOrNull:
+    case MIRType::StackResults:
+      if (intRegIndex_ == NumIntArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint32_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_));
+      intRegIndex_++;
+      break;
+    case MIRType::Int64:
+      // Make sure to use an even register index. Increase to next even number
+      // when odd.
+      intRegIndex_ = (intRegIndex_ + 1) & ~1;
+      if (intRegIndex_ == NumIntArgRegs) {
+        // Align the stack on 8 bytes.
+        static const uint32_t align = sizeof(uint64_t) - 1;
+        stackOffset_ = (stackOffset_ + align) & ~align;
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint64_t);
+        break;
+      }
+      current_ = ABIArg(Register::FromCode(intRegIndex_),
+                        Register::FromCode(intRegIndex_ + 1));
+      intRegIndex_ += 2;
+      break;
+    case MIRType::Float32:
+      if (floatRegIndex_ == NumFloatArgRegs) {
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint32_t);
+        break;
+      }
+      current_ = ABIArg(VFPRegister(floatRegIndex_, VFPRegister::Single));
+      floatRegIndex_++;
+      break;
+    case MIRType::Double:
+      // Double register are composed of 2 float registers, thus we have to
+      // skip any float register which cannot be used in a pair of float
+      // registers in which a double value can be stored.
+      floatRegIndex_ = (floatRegIndex_ + 1) & ~1;
+      if (floatRegIndex_ == NumFloatArgRegs) {
+        static const uint32_t align = sizeof(double) - 1;
+        stackOffset_ = (stackOffset_ + align) & ~align;
+        current_ = ABIArg(stackOffset_);
+        stackOffset_ += sizeof(uint64_t);
+        break;
+      }
+      current_ = ABIArg(VFPRegister(floatRegIndex_ >> 1, VFPRegister::Double));
+      floatRegIndex_ += 2;
+      break;
+    default:
+      MOZ_CRASH("Unexpected argument type");
+  }
+
+  return current_;
+}
+
+ABIArg ABIArgGenerator::next(MIRType type) {
+  if (useHardFp_) {
+    return hardNext(type);
+  }
+  return softNext(type);
+}
+
+bool js::jit::IsUnaligned(const wasm::MemoryAccessDesc& access) {
+  if (!access.align()) {
+    return false;
+  }
+
+  if (access.type() == Scalar::Float64 && access.align() >= 4) {
+    return false;
+  }
+
+  return access.align() < access.byteSize();
+}
+
+// Encode a standard register when it is being used as src1, the dest, and an
+// extra register. These should never be called with an InvalidReg.
+uint32_t js::jit::RT(Register r) {
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 12;
+}
+
+uint32_t js::jit::RN(Register r) {
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 16;
+}
+
+uint32_t js::jit::RD(Register r) {
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 12;
+}
+
+uint32_t js::jit::RM(Register r) {
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 8;
+}
+
+// Encode a standard register when it is being used as src1, the dest, and an
+// extra register. For these, an InvalidReg is used to indicate a optional
+// register that has been omitted.
+uint32_t js::jit::maybeRT(Register r) {
+  if (r == InvalidReg) {
+    return 0;
+  }
+
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 12;
+}
+
+uint32_t js::jit::maybeRN(Register r) {
+  if (r == InvalidReg) {
+    return 0;
+  }
+
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 16;
+}
+
+uint32_t js::jit::maybeRD(Register r) {
+  if (r == InvalidReg) {
+    return 0;
+  }
+
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return r.code() << 12;
+}
+
+Register js::jit::toRD(Instruction i) {
+  return Register::FromCode((i.encode() >> 12) & 0xf);
+}
+Register js::jit::toR(Instruction i) {
+  return Register::FromCode(i.encode() & 0xf);
+}
+
+Register js::jit::toRM(Instruction i) {
+  return Register::FromCode((i.encode() >> 8) & 0xf);
+}
+
+Register js::jit::toRN(Instruction i) {
+  return Register::FromCode((i.encode() >> 16) & 0xf);
+}
+
+uint32_t js::jit::VD(VFPRegister vr) {
+  if (vr.isMissing()) {
+    return 0;
+  }
+
+  // Bits 15,14,13,12, 22.
+  VFPRegister::VFPRegIndexSplit s = vr.encode();
+  return s.bit << 22 | s.block << 12;
+}
+uint32_t js::jit::VN(VFPRegister vr) {
+  if (vr.isMissing()) {
+    return 0;
+  }
+
+  // Bits 19,18,17,16, 7.
+  VFPRegister::VFPRegIndexSplit s = vr.encode();
+  return s.bit << 7 | s.block << 16;
+}
+uint32_t js::jit::VM(VFPRegister vr) {
+  if (vr.isMissing()) {
+    return 0;
+  }
+
+  // Bits 5, 3,2,1,0.
+  VFPRegister::VFPRegIndexSplit s = vr.encode();
+  return s.bit << 5 | s.block;
+}
+
+VFPRegister::VFPRegIndexSplit jit::VFPRegister::encode() {
+  MOZ_ASSERT(!_isInvalid);
+
+  switch (kind) {
+    case Double:
+      return VFPRegIndexSplit(code_ & 0xf, code_ >> 4);
+    case Single:
+      return VFPRegIndexSplit(code_ >> 1, code_ & 1);
+    default:
+      // VFP register treated as an integer, NOT a gpr.
+      return VFPRegIndexSplit(code_ >> 1, code_ & 1);
+  }
+}
+
+bool InstDTR::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsDTRMask) == (uint32_t)IsDTR;
+}
+
+InstDTR* InstDTR::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstDTR*)&i;
+  }
+  return nullptr;
+}
+
+bool InstLDR::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsDTRMask) == (uint32_t)IsDTR;
+}
+
+InstLDR* InstLDR::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstLDR*)&i;
+  }
+  return nullptr;
+}
+
+InstNOP* InstNOP::AsTHIS(Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstNOP*)&i;
+  }
+  return nullptr;
+}
+
+bool InstNOP::IsTHIS(const Instruction& i) {
+  return (i.encode() & 0x0fffffff) == NopInst;
+}
+
+bool InstBranchReg::IsTHIS(const Instruction& i) {
+  return InstBXReg::IsTHIS(i) || InstBLXReg::IsTHIS(i);
+}
+
+InstBranchReg* InstBranchReg::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBranchReg*)&i;
+  }
+  return nullptr;
+}
+void InstBranchReg::extractDest(Register* dest) { *dest = toR(*this); }
+bool InstBranchReg::checkDest(Register dest) { return dest == toR(*this); }
+
+bool InstBranchImm::IsTHIS(const Instruction& i) {
+  return InstBImm::IsTHIS(i) || InstBLImm::IsTHIS(i);
+}
+
+InstBranchImm* InstBranchImm::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBranchImm*)&i;
+  }
+  return nullptr;
+}
+
+void InstBranchImm::extractImm(BOffImm* dest) { *dest = BOffImm(*this); }
+
+bool InstBXReg::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsBRegMask) == IsBX;
+}
+
+InstBXReg* InstBXReg::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBXReg*)&i;
+  }
+  return nullptr;
+}
+
+bool InstBLXReg::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsBRegMask) == IsBLX;
+}
+InstBLXReg* InstBLXReg::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBLXReg*)&i;
+  }
+  return nullptr;
+}
+
+bool InstBImm::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsBImmMask) == IsB;
+}
+InstBImm* InstBImm::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBImm*)&i;
+  }
+  return nullptr;
+}
+
+bool InstBLImm::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsBImmMask) == IsBL;
+}
+InstBLImm* InstBLImm::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstBLImm*)&i;
+  }
+  return nullptr;
+}
+
+bool InstMovWT::IsTHIS(Instruction& i) {
+  return InstMovW::IsTHIS(i) || InstMovT::IsTHIS(i);
+}
+InstMovWT* InstMovWT::AsTHIS(Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstMovWT*)&i;
+  }
+  return nullptr;
+}
+
+void InstMovWT::extractImm(Imm16* imm) { *imm = Imm16(*this); }
+bool InstMovWT::checkImm(Imm16 imm) {
+  return imm.decode() == Imm16(*this).decode();
+}
+
+void InstMovWT::extractDest(Register* dest) { *dest = toRD(*this); }
+bool InstMovWT::checkDest(Register dest) { return dest == toRD(*this); }
+
+bool InstMovW::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsWTMask) == IsW;
+}
+
+InstMovW* InstMovW::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstMovW*)&i;
+  }
+  return nullptr;
+}
+InstMovT* InstMovT::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstMovT*)&i;
+  }
+  return nullptr;
+}
+
+bool InstMovT::IsTHIS(const Instruction& i) {
+  return (i.encode() & IsWTMask) == IsT;
+}
+
+InstALU* InstALU::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstALU*)&i;
+  }
+  return nullptr;
+}
+bool InstALU::IsTHIS(const Instruction& i) {
+  return (i.encode() & ALUMask) == 0;
+}
+void InstALU::extractOp(ALUOp* ret) { *ret = ALUOp(encode() & (0xf << 21)); }
+bool InstALU::checkOp(ALUOp op) {
+  ALUOp mine;
+  extractOp(&mine);
+  return mine == op;
+}
+void InstALU::extractDest(Register* ret) { *ret = toRD(*this); }
+bool InstALU::checkDest(Register rd) { return rd == toRD(*this); }
+void InstALU::extractOp1(Register* ret) { *ret = toRN(*this); }
+bool InstALU::checkOp1(Register rn) { return rn == toRN(*this); }
+Operand2 InstALU::extractOp2() { return Operand2(encode()); }
+
+InstCMP* InstCMP::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstCMP*)&i;
+  }
+  return nullptr;
+}
+
+bool InstCMP::IsTHIS(const Instruction& i) {
+  return InstALU::IsTHIS(i) && InstALU::AsTHIS(i)->checkDest(r0) &&
+         InstALU::AsTHIS(i)->checkOp(OpCmp);
+}
+
+InstMOV* InstMOV::AsTHIS(const Instruction& i) {
+  if (IsTHIS(i)) {
+    return (InstMOV*)&i;
+  }
+  return nullptr;
+}
+
+bool InstMOV::IsTHIS(const Instruction& i) {
+  return InstALU::IsTHIS(i) && InstALU::AsTHIS(i)->checkOp1(r0) &&
+         InstALU::AsTHIS(i)->checkOp(OpMov);
+}
+
+Op2Reg Operand2::toOp2Reg() const { return *(Op2Reg*)this; }
+
+Imm16::Imm16(Instruction& inst)
+    : lower_(inst.encode() & 0xfff),
+      upper_(inst.encode() >> 16),
+      invalid_(0xfff) {}
+
+Imm16::Imm16(uint32_t imm)
+    : lower_(imm & 0xfff), pad_(0), upper_((imm >> 12) & 0xf), invalid_(0) {
+  MOZ_ASSERT(decode() == imm);
+}
+
+Imm16::Imm16() : invalid_(0xfff) {}
+
+void Assembler::finish() {
+  flush();
+  MOZ_ASSERT(!isFinished);
+  isFinished = true;
+}
+
+bool Assembler::appendRawCode(const uint8_t* code, size_t numBytes) {
+  flush();
+  return m_buffer.appendRawCode(code, numBytes);
+}
+
+bool Assembler::reserve(size_t size) {
+  // This buffer uses fixed-size chunks so there's no point in reserving
+  // now vs. on-demand.
+  return !oom();
+}
+
+bool Assembler::swapBuffer(wasm::Bytes& bytes) {
+  // For now, specialize to the one use case. As long as wasm::Bytes is a
+  // Vector, not a linked-list of chunks, there's not much we can do other
+  // than copy.
+  MOZ_ASSERT(bytes.empty());
+  if (!bytes.resize(bytesNeeded())) {
+    return false;
+  }
+  m_buffer.executableCopy(bytes.begin());
+  return true;
+}
+
+void Assembler::executableCopy(uint8_t* buffer) {
+  MOZ_ASSERT(isFinished);
+  m_buffer.executableCopy(buffer);
+}
+
+class RelocationIterator {
+  CompactBufferReader reader_;
+  // Offset in bytes.
+  uint32_t offset_;
+
+ public:
+  explicit RelocationIterator(CompactBufferReader& reader) : reader_(reader) {}
+
+  bool read() {
+    if (!reader_.more()) {
+      return false;
+    }
+    offset_ = reader_.readUnsigned();
+    return true;
+  }
+
+  uint32_t offset() const { return offset_; }
+};
+
+template <class Iter>
+const uint32_t* Assembler::GetCF32Target(Iter* iter) {
+  Instruction* inst1 = iter->cur();
+
+  if (inst1->is<InstBranchImm>()) {
+    // See if we have a simple case, b #offset.
+    BOffImm imm;
+    InstBranchImm* jumpB = inst1->as<InstBranchImm>();
+    jumpB->extractImm(&imm);
+    return imm.getDest(inst1)->raw();
+  }
+
+  if (inst1->is<InstMovW>()) {
+    // See if we have the complex case:
+    //  movw r_temp, #imm1
+    //  movt r_temp, #imm2
+    //  bx r_temp
+    // OR
+    //  movw r_temp, #imm1
+    //  movt r_temp, #imm2
+    //  str pc, [sp]
+    //  bx r_temp
+
+    Imm16 targ_bot;
+    Imm16 targ_top;
+    Register temp;
+
+    // Extract both the temp register and the bottom immediate.
+    InstMovW* bottom = inst1->as<InstMovW>();
+    bottom->extractImm(&targ_bot);
+    bottom->extractDest(&temp);
+
+    // Extract the top part of the immediate.
+    Instruction* inst2 = iter->next();
+    MOZ_ASSERT(inst2->is<InstMovT>());
+    InstMovT* top = inst2->as<InstMovT>();
+    top->extractImm(&targ_top);
+
+    // Make sure they are being loaded into the same register.
+    MOZ_ASSERT(top->checkDest(temp));
+
+    // Make sure we're branching to the same register.
+#ifdef DEBUG
+    // A toggled call sometimes has a NOP instead of a branch for the third
+    // instruction. No way to assert that it's valid in that situation.
+    Instruction* inst3 = iter->next();
+    if (!inst3->is<InstNOP>()) {
+      InstBranchReg* realBranch = nullptr;
+      if (inst3->is<InstBranchReg>()) {
+        realBranch = inst3->as<InstBranchReg>();
+      } else {
+        Instruction* inst4 = iter->next();
+        realBranch = inst4->as<InstBranchReg>();
+      }
+      MOZ_ASSERT(realBranch->checkDest(temp));
+    }
+#endif
+
+    uint32_t* dest = (uint32_t*)(targ_bot.decode() | (targ_top.decode() << 16));
+    return dest;
+  }
+
+  if (inst1->is<InstLDR>()) {
+    return *(uint32_t**)inst1->as<InstLDR>()->dest();
+  }
+
+  MOZ_CRASH("unsupported branch relocation");
+}
+
+uintptr_t Assembler::GetPointer(uint8_t* instPtr) {
+  InstructionIterator iter((Instruction*)instPtr);
+  uintptr_t ret = (uintptr_t)GetPtr32Target(iter, nullptr, nullptr);
+  return ret;
+}
+
+const uint32_t* Assembler::GetPtr32Target(InstructionIterator start,
+                                          Register* dest, RelocStyle* style) {
+  Instruction* load1 = start.cur();
+  Instruction* load2 = start.next();
+
+  if (load1->is<InstMovW>() && load2->is<InstMovT>()) {
+    if (style) {
+      *style = L_MOVWT;
+    }
+
+    // See if we have the complex case:
+    //  movw r_temp, #imm1
+    //  movt r_temp, #imm2
+
+    Imm16 targ_bot;
+    Imm16 targ_top;
+    Register temp;
+
+    // Extract both the temp register and the bottom immediate.
+    InstMovW* bottom = load1->as<InstMovW>();
+    bottom->extractImm(&targ_bot);
+    bottom->extractDest(&temp);
+
+    // Extract the top part of the immediate.
+    InstMovT* top = load2->as<InstMovT>();
+    top->extractImm(&targ_top);
+
+    // Make sure they are being loaded into the same register.
+    MOZ_ASSERT(top->checkDest(temp));
+
+    if (dest) {
+      *dest = temp;
+    }
+
+    uint32_t* value =
+        (uint32_t*)(targ_bot.decode() | (targ_top.decode() << 16));
+    return value;
+  }
+
+  if (load1->is<InstLDR>()) {
+    if (style) {
+      *style = L_LDR;
+    }
+    if (dest) {
+      *dest = toRD(*load1);
+    }
+    return *(uint32_t**)load1->as<InstLDR>()->dest();
+  }
+
+  MOZ_CRASH("unsupported relocation");
+}
+
+static JitCode* CodeFromJump(InstructionIterator* jump) {
+  uint8_t* target = (uint8_t*)Assembler::GetCF32Target(jump);
+  return JitCode::FromExecutable(target);
+}
+
+void Assembler::TraceJumpRelocations(JSTracer* trc, JitCode* code,
+                                     CompactBufferReader& reader) {
+  RelocationIterator iter(reader);
+  while (iter.read()) {
+    InstructionIterator institer((Instruction*)(code->raw() + iter.offset()));
+    JitCode* child = CodeFromJump(&institer);
+    TraceManuallyBarrieredEdge(trc, &child, "rel32");
+  }
+}
+
+static void TraceOneDataRelocation(JSTracer* trc,
+                                   mozilla::Maybe<AutoWritableJitCode>& awjc,
+                                   JitCode* code, InstructionIterator iter) {
+  Register dest;
+  Assembler::RelocStyle rs;
+  const void* prior = Assembler::GetPtr32Target(iter, &dest, &rs);
+  void* ptr = const_cast<void*>(prior);
+
+  // No barrier needed since these are constants.
+  TraceManuallyBarrieredGenericPointerEdge(
+      trc, reinterpret_cast<gc::Cell**>(&ptr), "jit-masm-ptr");
+
+  if (ptr != prior) {
+    if (awjc.isNothing()) {
+      awjc.emplace(code);
+    }
+
+    MacroAssemblerARM::ma_mov_patch(Imm32(int32_t(ptr)), dest,
+                                    Assembler::Always, rs, iter);
+  }
+}
+
+/* static */
+void Assembler::TraceDataRelocations(JSTracer* trc, JitCode* code,
+                                     CompactBufferReader& reader) {
+  mozilla::Maybe<AutoWritableJitCode> awjc;
+  while (reader.more()) {
+    size_t offset = reader.readUnsigned();
+    InstructionIterator iter((Instruction*)(code->raw() + offset));
+    TraceOneDataRelocation(trc, awjc, code, iter);
+  }
+}
+
+void Assembler::copyJumpRelocationTable(uint8_t* dest) {
+  if (jumpRelocations_.length()) {
+    memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length());
+  }
+}
+
+void Assembler::copyDataRelocationTable(uint8_t* dest) {
+  if (dataRelocations_.length()) {
+    memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length());
+  }
+}
+
+void Assembler::processCodeLabels(uint8_t* rawCode) {
+  for (const CodeLabel& label : codeLabels_) {
+    Bind(rawCode, label);
+  }
+}
+
+void Assembler::writeCodePointer(CodeLabel* label) {
+  m_buffer.assertNoPoolAndNoNops();
+  BufferOffset off = writeInst(-1);
+  label->patchAt()->bind(off.getOffset());
+}
+
+void Assembler::Bind(uint8_t* rawCode, const CodeLabel& label) {
+  size_t offset = label.patchAt().offset();
+  size_t target = label.target().offset();
+  *reinterpret_cast<const void**>(rawCode + offset) = rawCode + target;
+}
+
+Assembler::Condition Assembler::InvertCondition(Condition cond) {
+  const uint32_t ConditionInversionBit = 0x10000000;
+  return Condition(ConditionInversionBit ^ cond);
+}
+
+Assembler::Condition Assembler::UnsignedCondition(Condition cond) {
+  switch (cond) {
+    case Zero:
+    case NonZero:
+      return cond;
+    case LessThan:
+    case Below:
+      return Below;
+    case LessThanOrEqual:
+    case BelowOrEqual:
+      return BelowOrEqual;
+    case GreaterThan:
+    case Above:
+      return Above;
+    case AboveOrEqual:
+    case GreaterThanOrEqual:
+      return AboveOrEqual;
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+Assembler::Condition Assembler::ConditionWithoutEqual(Condition cond) {
+  switch (cond) {
+    case LessThan:
+    case LessThanOrEqual:
+      return LessThan;
+    case Below:
+    case BelowOrEqual:
+      return Below;
+    case GreaterThan:
+    case GreaterThanOrEqual:
+      return GreaterThan;
+    case Above:
+    case AboveOrEqual:
+      return Above;
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+}
+
+Assembler::DoubleCondition Assembler::InvertCondition(DoubleCondition cond) {
+  const uint32_t ConditionInversionBit = 0x10000000;
+  return DoubleCondition(ConditionInversionBit ^ cond);
+}
+
+Imm8::TwoImm8mData Imm8::EncodeTwoImms(uint32_t imm) {
+  // In the ideal case, we are looking for a number that (in binary) looks
+  // like:
+  //   0b((00)*)n_1((00)*)n_2((00)*)
+  //      left  n1   mid  n2
+  //   where both n_1 and n_2 fit into 8 bits.
+  // Since this is being done with rotates, we also need to handle the case
+  // that one of these numbers is in fact split between the left and right
+  // sides, in which case the constant will look like:
+  //   0bn_1a((00)*)n_2((00)*)n_1b
+  //     n1a  mid  n2   rgh    n1b
+  // Also remember, values are rotated by multiples of two, and left, mid or
+  // right can have length zero.
+  uint32_t imm1, imm2;
+  int left = CountLeadingZeroes32(imm) & 0x1E;
+  uint32_t no_n1 = imm & ~(0xff << (24 - left));
+
+  // Not technically needed: this case only happens if we can encode as a
+  // single imm8m. There is a perfectly reasonable encoding in this case, but
+  // we shouldn't encourage people to do things like this.
+  if (no_n1 == 0) {
+    return TwoImm8mData();
+  }
+
+  int mid = CountLeadingZeroes32(no_n1) & 0x1E;
+  uint32_t no_n2 =
+      no_n1 & ~((0xff << ((24 - mid) & 0x1f)) | 0xff >> ((8 + mid) & 0x1f));
+
+  if (no_n2 == 0) {
+    // We hit the easy case, no wraparound.
+    // Note: a single constant *may* look like this.
+    int imm1shift = left + 8;
+    int imm2shift = mid + 8;
+    imm1 = (imm >> (32 - imm1shift)) & 0xff;
+    if (imm2shift >= 32) {
+      imm2shift = 0;
+      // This assert does not always hold, in fact, this would lead to
+      // some incredibly subtle bugs.
+      // assert((imm & 0xff) == no_n1);
+      imm2 = no_n1;
+    } else {
+      imm2 = ((imm >> (32 - imm2shift)) | (imm << imm2shift)) & 0xff;
+      MOZ_ASSERT(((no_n1 >> (32 - imm2shift)) | (no_n1 << imm2shift)) == imm2);
+    }
+    MOZ_ASSERT((imm1shift & 0x1) == 0);
+    MOZ_ASSERT((imm2shift & 0x1) == 0);
+    return TwoImm8mData(datastore::Imm8mData(imm1, imm1shift >> 1),
+                        datastore::Imm8mData(imm2, imm2shift >> 1));
+  }
+
+  // Either it wraps, or it does not fit. If we initially chopped off more
+  // than 8 bits, then it won't fit.
+  if (left >= 8) {
+    return TwoImm8mData();
+  }
+
+  int right = 32 - (CountLeadingZeroes32(no_n2) & 30);
+  // All remaining set bits *must* fit into the lower 8 bits.
+  // The right == 8 case should be handled by the previous case.
+  if (right > 8) {
+    return TwoImm8mData();
+  }
+
+  // Make sure the initial bits that we removed for no_n1 fit into the
+  // 8-(32-right) leftmost bits.
+  if (((imm & (0xff << (24 - left))) << (8 - right)) != 0) {
+    // BUT we may have removed more bits than we needed to for no_n1
+    // 0x04104001 e.g. we can encode 0x104 with a single op, then 0x04000001
+    // with a second, but we try to encode 0x0410000 and find that we need a
+    // second op for 0x4000, and 0x1 cannot be included in the encoding of
+    // 0x04100000.
+    no_n1 = imm & ~((0xff >> (8 - right)) | (0xff << (24 + right)));
+    mid = CountLeadingZeroes32(no_n1) & 30;
+    no_n2 = no_n1 & ~((0xff << ((24 - mid) & 31)) | 0xff >> ((8 + mid) & 31));
+    if (no_n2 != 0) {
+      return TwoImm8mData();
+    }
+  }
+
+  // Now assemble all of this information into a two coherent constants it is
+  // a rotate right from the lower 8 bits.
+  int imm1shift = 8 - right;
+  imm1 = 0xff & ((imm << imm1shift) | (imm >> (32 - imm1shift)));
+  MOZ_ASSERT((imm1shift & ~0x1e) == 0);
+  // left + 8 + mid is the position of the leftmost bit of n_2.
+  // We needed to rotate 0x000000ab right by 8 in order to get 0xab000000,
+  // then shift again by the leftmost bit in order to get the constant that we
+  // care about.
+  int imm2shift = mid + 8;
+  imm2 = ((imm >> (32 - imm2shift)) | (imm << imm2shift)) & 0xff;
+  MOZ_ASSERT((imm1shift & 0x1) == 0);
+  MOZ_ASSERT((imm2shift & 0x1) == 0);
+  return TwoImm8mData(datastore::Imm8mData(imm1, imm1shift >> 1),
+                      datastore::Imm8mData(imm2, imm2shift >> 1));
+}
+
+ALUOp jit::ALUNeg(ALUOp op, Register dest, Register scratch, Imm32* imm,
+                  Register* negDest) {
+  // Find an alternate ALUOp to get the job done, and use a different imm.
+  *negDest = dest;
+  switch (op) {
+    case OpMov:
+      *imm = Imm32(~imm->value);
+      return OpMvn;
+    case OpMvn:
+      *imm = Imm32(~imm->value);
+      return OpMov;
+    case OpAnd:
+      *imm = Imm32(~imm->value);
+      return OpBic;
+    case OpBic:
+      *imm = Imm32(~imm->value);
+      return OpAnd;
+    case OpAdd:
+      *imm = Imm32(-imm->value);
+      return OpSub;
+    case OpSub:
+      *imm = Imm32(-imm->value);
+      return OpAdd;
+    case OpCmp:
+      *imm = Imm32(-imm->value);
+      return OpCmn;
+    case OpCmn:
+      *imm = Imm32(-imm->value);
+      return OpCmp;
+    case OpTst:
+      MOZ_ASSERT(dest == InvalidReg);
+      *imm = Imm32(~imm->value);
+      *negDest = scratch;
+      return OpBic;
+      // orr has orn on thumb2 only.
+    default:
+      return OpInvalid;
+  }
+}
+
+bool jit::can_dbl(ALUOp op) {
+  // Some instructions can't be processed as two separate instructions such as
+  // and, and possibly add (when we're setting ccodes). There is also some
+  // hilarity with *reading* condition codes. For example, adc dest, src1,
+  // 0xfff; (add with carry) can be split up into adc dest, src1, 0xf00; add
+  // dest, dest, 0xff, since "reading" the condition code increments the
+  // result by one conditionally, that only needs to be done on one of the two
+  // instructions.
+  switch (op) {
+    case OpBic:
+    case OpAdd:
+    case OpSub:
+    case OpEor:
+    case OpOrr:
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool jit::condsAreSafe(ALUOp op) {
+  // Even when we are setting condition codes, sometimes we can get away with
+  // splitting an operation into two. For example, if our immediate is
+  // 0x00ff00ff, and the operation is eors we can split this in half, since x
+  // ^ 0x00ff0000 ^ 0x000000ff should set all of its condition codes exactly
+  // the same as x ^ 0x00ff00ff. However, if the operation were adds, we
+  // cannot split this in half. If the source on the add is 0xfff00ff0, the
+  // result sholud be 0xef10ef, but do we set the overflow bit or not?
+  // Depending on which half is performed first (0x00ff0000 or 0x000000ff) the
+  // V bit will be set differently, and *not* updating the V bit would be
+  // wrong. Theoretically, the following should work:
+  //  adds r0, r1, 0x00ff0000;
+  //  addsvs r0, r1, 0x000000ff;
+  //  addvc r0, r1, 0x000000ff;
+  // But this is 3 instructions, and at that point, we might as well use
+  // something else.
+  switch (op) {
+    case OpBic:
+    case OpOrr:
+    case OpEor:
+      return true;
+    default:
+      return false;
+  }
+}
+
+ALUOp jit::getDestVariant(ALUOp op) {
+  // All of the compare operations are dest-less variants of a standard
+  // operation. Given the dest-less variant, return the dest-ful variant.
+  switch (op) {
+    case OpCmp:
+      return OpSub;
+    case OpCmn:
+      return OpAdd;
+    case OpTst:
+      return OpAnd;
+    case OpTeq:
+      return OpEor;
+    default:
+      return op;
+  }
+}
+
+O2RegImmShift jit::O2Reg(Register r) { return O2RegImmShift(r, LSL, 0); }
+
+O2RegImmShift jit::lsl(Register r, int amt) {
+  MOZ_ASSERT(0 <= amt && amt <= 31);
+  return O2RegImmShift(r, LSL, amt);
+}
+
+O2RegImmShift jit::lsr(Register r, int amt) {
+  MOZ_ASSERT(1 <= amt && amt <= 32);
+  return O2RegImmShift(r, LSR, amt);
+}
+
+O2RegImmShift jit::ror(Register r, int amt) {
+  MOZ_ASSERT(1 <= amt && amt <= 31);
+  return O2RegImmShift(r, ROR, amt);
+}
+O2RegImmShift jit::rol(Register r, int amt) {
+  MOZ_ASSERT(1 <= amt && amt <= 31);
+  return O2RegImmShift(r, ROR, 32 - amt);
+}
+
+O2RegImmShift jit::asr(Register r, int amt) {
+  MOZ_ASSERT(1 <= amt && amt <= 32);
+  return O2RegImmShift(r, ASR, amt);
+}
+
+O2RegRegShift jit::lsl(Register r, Register amt) {
+  return O2RegRegShift(r, LSL, amt);
+}
+
+O2RegRegShift jit::lsr(Register r, Register amt) {
+  return O2RegRegShift(r, LSR, amt);
+}
+
+O2RegRegShift jit::ror(Register r, Register amt) {
+  return O2RegRegShift(r, ROR, amt);
+}
+
+O2RegRegShift jit::asr(Register r, Register amt) {
+  return O2RegRegShift(r, ASR, amt);
+}
+
+static js::jit::DoubleEncoder doubleEncoder;
+
+/* static */
+const js::jit::VFPImm js::jit::VFPImm::One(0x3FF00000);
+
+js::jit::VFPImm::VFPImm(uint32_t top) {
+  data_ = -1;
+  datastore::Imm8VFPImmData tmp;
+  if (doubleEncoder.lookup(top, &tmp)) {
+    data_ = tmp.encode();
+  }
+}
+
+BOffImm::BOffImm(const Instruction& inst) : data_(inst.encode() & 0x00ffffff) {}
+
+Instruction* BOffImm::getDest(Instruction* src) const {
+  // TODO: It is probably worthwhile to verify that src is actually a branch.
+  // NOTE: This does not explicitly shift the offset of the destination left by
+  // 2, since it is indexing into an array of instruction sized objects.
+  return &src[((int32_t(data_) << 8) >> 8) + 2];
+}
+
+const js::jit::DoubleEncoder::DoubleEntry js::jit::DoubleEncoder::table[256] = {
+#include "jit/arm/DoubleEntryTable.tbl"
+};
+
+// VFPRegister implementation
+VFPRegister VFPRegister::doubleOverlay(unsigned int which) const {
+  MOZ_ASSERT(!_isInvalid);
+  MOZ_ASSERT(which == 0);
+  if (kind != Double) {
+    return VFPRegister(code_ >> 1, Double);
+  }
+  return *this;
+}
+VFPRegister VFPRegister::singleOverlay(unsigned int which) const {
+  MOZ_ASSERT(!_isInvalid);
+  if (kind == Double) {
+    // There are no corresponding float registers for d16-d31.
+    MOZ_ASSERT(code_ < 16);
+    MOZ_ASSERT(which < 2);
+    return VFPRegister((code_ << 1) + which, Single);
+  }
+  MOZ_ASSERT(which == 0);
+  return VFPRegister(code_, Single);
+}
+
+static_assert(
+    FloatRegisters::TotalDouble <= 16,
+    "We assume that every Double register also has an Integer personality");
+
+VFPRegister VFPRegister::sintOverlay(unsigned int which) const {
+  MOZ_ASSERT(!_isInvalid);
+  if (kind == Double) {
+    // There are no corresponding float registers for d16-d31.
+    MOZ_ASSERT(code_ < 16);
+    MOZ_ASSERT(which < 2);
+    return VFPRegister((code_ << 1) + which, Int);
+  }
+  MOZ_ASSERT(which == 0);
+  return VFPRegister(code_, Int);
+}
+VFPRegister VFPRegister::uintOverlay(unsigned int which) const {
+  MOZ_ASSERT(!_isInvalid);
+  if (kind == Double) {
+    // There are no corresponding float registers for d16-d31.
+    MOZ_ASSERT(code_ < 16);
+    MOZ_ASSERT(which < 2);
+    return VFPRegister((code_ << 1) + which, UInt);
+  }
+  MOZ_ASSERT(which == 0);
+  return VFPRegister(code_, UInt);
+}
+
+bool Assembler::oom() const {
+  return AssemblerShared::oom() || m_buffer.oom() || jumpRelocations_.oom() ||
+         dataRelocations_.oom();
+}
+
+// Size of the instruction stream, in bytes. Including pools. This function
+// expects all pools that need to be placed have been placed. If they haven't
+// then we need to go an flush the pools :(
+size_t Assembler::size() const { return m_buffer.size(); }
+// Size of the relocation table, in bytes.
+size_t Assembler::jumpRelocationTableBytes() const {
+  return jumpRelocations_.length();
+}
+size_t Assembler::dataRelocationTableBytes() const {
+  return dataRelocations_.length();
+}
+
+// Size of the data table, in bytes.
+size_t Assembler::bytesNeeded() const {
+  return size() + jumpRelocationTableBytes() + dataRelocationTableBytes();
+}
+
+// Allocate memory for a branch instruction, it will be overwritten
+// subsequently and should not be disassembled.
+
+BufferOffset Assembler::allocBranchInst() {
+  return m_buffer.putInt(Always | InstNOP::NopInst);
+}
+
+void Assembler::WriteInstStatic(uint32_t x, uint32_t* dest) {
+  MOZ_ASSERT(dest != nullptr);
+  *dest = x;
+}
+
+void Assembler::haltingAlign(int alignment) {
+  // HLT with payload 0xBAAD
+  m_buffer.align(alignment, 0xE1000070 | (0xBAA << 8) | 0xD);
+}
+
+void Assembler::nopAlign(int alignment) { m_buffer.align(alignment); }
+
+BufferOffset Assembler::as_nop() { return writeInst(0xe320f000); }
+
+static uint32_t EncodeAlu(Register dest, Register src1, Operand2 op2, ALUOp op,
+                          SBit s, Assembler::Condition c) {
+  return (int)op | (int)s | (int)c | op2.encode() |
+         ((dest == InvalidReg) ? 0 : RD(dest)) |
+         ((src1 == InvalidReg) ? 0 : RN(src1));
+}
+
+BufferOffset Assembler::as_alu(Register dest, Register src1, Operand2 op2,
+                               ALUOp op, SBit s, Condition c) {
+  return writeInst(EncodeAlu(dest, src1, op2, op, s, c));
+}
+
+BufferOffset Assembler::as_mov(Register dest, Operand2 op2, SBit s,
+                               Condition c) {
+  return as_alu(dest, InvalidReg, op2, OpMov, s, c);
+}
+
+/* static */
+void Assembler::as_alu_patch(Register dest, Register src1, Operand2 op2,
+                             ALUOp op, SBit s, Condition c, uint32_t* pos) {
+  WriteInstStatic(EncodeAlu(dest, src1, op2, op, s, c), pos);
+}
+
+/* static */
+void Assembler::as_mov_patch(Register dest, Operand2 op2, SBit s, Condition c,
+                             uint32_t* pos) {
+  as_alu_patch(dest, InvalidReg, op2, OpMov, s, c, pos);
+}
+
+BufferOffset Assembler::as_mvn(Register dest, Operand2 op2, SBit s,
+                               Condition c) {
+  return as_alu(dest, InvalidReg, op2, OpMvn, s, c);
+}
+
+// Logical operations.
+BufferOffset Assembler::as_and(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpAnd, s, c);
+}
+BufferOffset Assembler::as_bic(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpBic, s, c);
+}
+BufferOffset Assembler::as_eor(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpEor, s, c);
+}
+BufferOffset Assembler::as_orr(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpOrr, s, c);
+}
+
+// Reverse byte operations.
+BufferOffset Assembler::as_rev(Register dest, Register src, Condition c) {
+  return writeInst((int)c | 0b0000'0110'1011'1111'0000'1111'0011'0000 |
+                   RD(dest) | src.code());
+}
+BufferOffset Assembler::as_rev16(Register dest, Register src, Condition c) {
+  return writeInst((int)c | 0b0000'0110'1011'1111'0000'1111'1011'0000 |
+                   RD(dest) | src.code());
+}
+BufferOffset Assembler::as_revsh(Register dest, Register src, Condition c) {
+  return writeInst((int)c | 0b0000'0110'1111'1111'0000'1111'1011'0000 |
+                   RD(dest) | src.code());
+}
+
+// Mathematical operations.
+BufferOffset Assembler::as_adc(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpAdc, s, c);
+}
+BufferOffset Assembler::as_add(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpAdd, s, c);
+}
+BufferOffset Assembler::as_sbc(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpSbc, s, c);
+}
+BufferOffset Assembler::as_sub(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpSub, s, c);
+}
+BufferOffset Assembler::as_rsb(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpRsb, s, c);
+}
+BufferOffset Assembler::as_rsc(Register dest, Register src1, Operand2 op2,
+                               SBit s, Condition c) {
+  return as_alu(dest, src1, op2, OpRsc, s, c);
+}
+
+// Test operations.
+BufferOffset Assembler::as_cmn(Register src1, Operand2 op2, Condition c) {
+  return as_alu(InvalidReg, src1, op2, OpCmn, SetCC, c);
+}
+BufferOffset Assembler::as_cmp(Register src1, Operand2 op2, Condition c) {
+  return as_alu(InvalidReg, src1, op2, OpCmp, SetCC, c);
+}
+BufferOffset Assembler::as_teq(Register src1, Operand2 op2, Condition c) {
+  return as_alu(InvalidReg, src1, op2, OpTeq, SetCC, c);
+}
+BufferOffset Assembler::as_tst(Register src1, Operand2 op2, Condition c) {
+  return as_alu(InvalidReg, src1, op2, OpTst, SetCC, c);
+}
+
+static constexpr Register NoAddend{Registers::pc};
+
+static const int SignExtend = 0x06000070;
+
+enum SignExtend {
+  SxSxtb = 10 << 20,
+  SxSxth = 11 << 20,
+  SxUxtb = 14 << 20,
+  SxUxth = 15 << 20
+};
+
+// Sign extension operations.
+BufferOffset Assembler::as_sxtb(Register dest, Register src, int rotate,
+                                Condition c) {
+  return writeInst((int)c | SignExtend | SxSxtb | RN(NoAddend) | RD(dest) |
+                   ((rotate & 3) << 10) | src.code());
+}
+BufferOffset Assembler::as_sxth(Register dest, Register src, int rotate,
+                                Condition c) {
+  return writeInst((int)c | SignExtend | SxSxth | RN(NoAddend) | RD(dest) |
+                   ((rotate & 3) << 10) | src.code());
+}
+BufferOffset Assembler::as_uxtb(Register dest, Register src, int rotate,
+                                Condition c) {
+  return writeInst((int)c | SignExtend | SxUxtb | RN(NoAddend) | RD(dest) |
+                   ((rotate & 3) << 10) | src.code());
+}
+BufferOffset Assembler::as_uxth(Register dest, Register src, int rotate,
+                                Condition c) {
+  return writeInst((int)c | SignExtend | SxUxth | RN(NoAddend) | RD(dest) |
+                   ((rotate & 3) << 10) | src.code());
+}
+
+static uint32_t EncodeMovW(Register dest, Imm16 imm, Assembler::Condition c) {
+  MOZ_ASSERT(HasMOVWT());
+  return 0x03000000 | c | imm.encode() | RD(dest);
+}
+
+static uint32_t EncodeMovT(Register dest, Imm16 imm, Assembler::Condition c) {
+  MOZ_ASSERT(HasMOVWT());
+  return 0x03400000 | c | imm.encode() | RD(dest);
+}
+
+// Not quite ALU worthy, but these are useful none the less. These also have
+// the isue of these being formatted completly differently from the standard ALU
+// operations.
+BufferOffset Assembler::as_movw(Register dest, Imm16 imm, Condition c) {
+  return writeInst(EncodeMovW(dest, imm, c));
+}
+
+/* static */
+void Assembler::as_movw_patch(Register dest, Imm16 imm, Condition c,
+                              Instruction* pos) {
+  WriteInstStatic(EncodeMovW(dest, imm, c), (uint32_t*)pos);
+}
+
+BufferOffset Assembler::as_movt(Register dest, Imm16 imm, Condition c) {
+  return writeInst(EncodeMovT(dest, imm, c));
+}
+
+/* static */
+void Assembler::as_movt_patch(Register dest, Imm16 imm, Condition c,
+                              Instruction* pos) {
+  WriteInstStatic(EncodeMovT(dest, imm, c), (uint32_t*)pos);
+}
+
+static const int mull_tag = 0x90;
+
+BufferOffset Assembler::as_genmul(Register dhi, Register dlo, Register rm,
+                                  Register rn, MULOp op, SBit s, Condition c) {
+  return writeInst(RN(dhi) | maybeRD(dlo) | RM(rm) | rn.code() | op | s | c |
+                   mull_tag);
+}
+BufferOffset Assembler::as_mul(Register dest, Register src1, Register src2,
+                               SBit s, Condition c) {
+  return as_genmul(dest, InvalidReg, src1, src2, OpmMul, s, c);
+}
+BufferOffset Assembler::as_mla(Register dest, Register acc, Register src1,
+                               Register src2, SBit s, Condition c) {
+  return as_genmul(dest, acc, src1, src2, OpmMla, s, c);
+}
+BufferOffset Assembler::as_umaal(Register destHI, Register destLO,
+                                 Register src1, Register src2, Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmUmaal, LeaveCC, c);
+}
+BufferOffset Assembler::as_mls(Register dest, Register acc, Register src1,
+                               Register src2, Condition c) {
+  return as_genmul(dest, acc, src1, src2, OpmMls, LeaveCC, c);
+}
+
+BufferOffset Assembler::as_umull(Register destHI, Register destLO,
+                                 Register src1, Register src2, SBit s,
+                                 Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmUmull, s, c);
+}
+
+BufferOffset Assembler::as_umlal(Register destHI, Register destLO,
+                                 Register src1, Register src2, SBit s,
+                                 Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmUmlal, s, c);
+}
+
+BufferOffset Assembler::as_smull(Register destHI, Register destLO,
+                                 Register src1, Register src2, SBit s,
+                                 Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmSmull, s, c);
+}
+
+BufferOffset Assembler::as_smlal(Register destHI, Register destLO,
+                                 Register src1, Register src2, SBit s,
+                                 Condition c) {
+  return as_genmul(destHI, destLO, src1, src2, OpmSmlal, s, c);
+}
+
+BufferOffset Assembler::as_sdiv(Register rd, Register rn, Register rm,
+                                Condition c) {
+  return writeInst(0x0710f010 | c | RN(rd) | RM(rm) | rn.code());
+}
+
+BufferOffset Assembler::as_udiv(Register rd, Register rn, Register rm,
+                                Condition c) {
+  return writeInst(0x0730f010 | c | RN(rd) | RM(rm) | rn.code());
+}
+
+BufferOffset Assembler::as_clz(Register dest, Register src, Condition c) {
+  MOZ_ASSERT(src != pc && dest != pc);
+  return writeInst(RD(dest) | src.code() | c | 0x016f0f10);
+}
+
+// Data transfer instructions: ldr, str, ldrb, strb. Using an int to
+// differentiate between 8 bits and 32 bits is overkill, but meh.
+
+static uint32_t EncodeDtr(LoadStore ls, int size, Index mode, Register rt,
+                          DTRAddr addr, Assembler::Condition c) {
+  MOZ_ASSERT(mode == Offset || (rt != addr.getBase() && pc != addr.getBase()));
+  MOZ_ASSERT(size == 32 || size == 8);
+  return 0x04000000 | ls | (size == 8 ? 0x00400000 : 0) | mode | c | RT(rt) |
+         addr.encode();
+}
+
+BufferOffset Assembler::as_dtr(LoadStore ls, int size, Index mode, Register rt,
+                               DTRAddr addr, Condition c) {
+  return writeInst(EncodeDtr(ls, size, mode, rt, addr, c));
+}
+
+/* static */
+void Assembler::as_dtr_patch(LoadStore ls, int size, Index mode, Register rt,
+                             DTRAddr addr, Condition c, uint32_t* dest) {
+  WriteInstStatic(EncodeDtr(ls, size, mode, rt, addr, c), dest);
+}
+
+class PoolHintData {
+ public:
+  enum LoadType {
+    // Set 0 to bogus, since that is the value most likely to be
+    // accidentally left somewhere.
+    PoolBOGUS = 0,
+    PoolDTR = 1,
+    PoolBranch = 2,
+    PoolVDTR = 3
+  };
+
+ private:
+  uint32_t index_ : 16;
+  uint32_t cond_ : 4;
+  uint32_t loadType_ : 2;
+  uint32_t destReg_ : 5;
+  uint32_t destType_ : 1;
+  uint32_t ONES : 4;
+
+  static const uint32_t ExpectedOnes = 0xfu;
+
+ public:
+  void init(uint32_t index, Assembler::Condition cond, LoadType lt,
+            Register destReg) {
+    index_ = index;
+    MOZ_ASSERT(index_ == index);
+    cond_ = cond >> 28;
+    MOZ_ASSERT(cond_ == cond >> 28);
+    loadType_ = lt;
+    ONES = ExpectedOnes;
+    destReg_ = destReg.code();
+    destType_ = 0;
+  }
+  void init(uint32_t index, Assembler::Condition cond, LoadType lt,
+            const VFPRegister& destReg) {
+    MOZ_ASSERT(destReg.isFloat());
+    index_ = index;
+    MOZ_ASSERT(index_ == index);
+    cond_ = cond >> 28;
+    MOZ_ASSERT(cond_ == cond >> 28);
+    loadType_ = lt;
+    ONES = ExpectedOnes;
+    destReg_ = destReg.id();
+    destType_ = destReg.isDouble();
+  }
+  Assembler::Condition getCond() const {
+    return Assembler::Condition(cond_ << 28);
+  }
+
+  Register getReg() const { return Register::FromCode(destReg_); }
+  VFPRegister getVFPReg() const {
+    VFPRegister r = VFPRegister(
+        destReg_, destType_ ? VFPRegister::Double : VFPRegister::Single);
+    return r;
+  }
+
+  int32_t getIndex() const { return index_; }
+  void setIndex(uint32_t index) {
+    MOZ_ASSERT(ONES == ExpectedOnes && loadType_ != PoolBOGUS);
+    index_ = index;
+    MOZ_ASSERT(index_ == index);
+  }
+
+  LoadType getLoadType() const {
+    // If this *was* a PoolBranch, but the branch has already been bound
+    // then this isn't going to look like a real poolhintdata, but we still
+    // want to lie about it so everyone knows it *used* to be a branch.
+    if (ONES != ExpectedOnes) {
+      return PoolHintData::PoolBranch;
+    }
+    return static_cast<LoadType>(loadType_);
+  }
+
+  bool isValidPoolHint() const {
+    // Most instructions cannot have a condition that is 0xf. Notable
+    // exceptions are blx and the entire NEON instruction set. For the
+    // purposes of pool loads, and possibly patched branches, the possible
+    // instructions are ldr and b, neither of which can have a condition
+    // code of 0xf.
+    return ONES == ExpectedOnes;
+  }
+};
+
+union PoolHintPun {
+  PoolHintData phd;
+  uint32_t raw;
+};
+
+// Handles all of the other integral data transferring functions: ldrsb, ldrsh,
+// ldrd, etc. The size is given in bits.
+BufferOffset Assembler::as_extdtr(LoadStore ls, int size, bool IsSigned,
+                                  Index mode, Register rt, EDtrAddr addr,
+                                  Condition c) {
+  int extra_bits2 = 0;
+  int extra_bits1 = 0;
+  switch (size) {
+    case 8:
+      MOZ_ASSERT(IsSigned);
+      MOZ_ASSERT(ls != IsStore);
+      extra_bits1 = 0x1;
+      extra_bits2 = 0x2;
+      break;
+    case 16:
+      // 'case 32' doesn't need to be handled, it is handled by the default
+      // ldr/str.
+      extra_bits2 = 0x01;
+      extra_bits1 = (ls == IsStore) ? 0 : 1;
+      if (IsSigned) {
+        MOZ_ASSERT(ls != IsStore);
+        extra_bits2 |= 0x2;
+      }
+      break;
+    case 64:
+      extra_bits2 = (ls == IsStore) ? 0x3 : 0x2;
+      extra_bits1 = 0;
+      break;
+    default:
+      MOZ_CRASH("unexpected size in as_extdtr");
+  }
+  return writeInst(extra_bits2 << 5 | extra_bits1 << 20 | 0x90 | addr.encode() |
+                   RT(rt) | mode | c);
+}
+
+BufferOffset Assembler::as_dtm(LoadStore ls, Register rn, uint32_t mask,
+                               DTMMode mode, DTMWriteBack wb, Condition c) {
+  return writeInst(0x08000000 | RN(rn) | ls | mode | mask | c | wb);
+}
+
+BufferOffset Assembler::allocLiteralLoadEntry(
+    size_t numInst, unsigned numPoolEntries, PoolHintPun& php, uint8_t* data,
+    const LiteralDoc& doc, ARMBuffer::PoolEntry* pe, bool loadToPC) {
+  uint8_t* inst = (uint8_t*)&php.raw;
+
+  MOZ_ASSERT(inst);
+  MOZ_ASSERT(numInst == 1);  // Or fix the disassembly
+
+  BufferOffset offs =
+      m_buffer.allocEntry(numInst, numPoolEntries, inst, data, pe);
+  propagateOOM(offs.assigned());
+#ifdef JS_DISASM_ARM
+  Instruction* instruction = m_buffer.getInstOrNull(offs);
+  if (instruction) {
+    spewLiteralLoad(php, loadToPC, instruction, doc);
+  }
+#endif
+  return offs;
+}
+
+// This is also used for instructions that might be resolved into branches,
+// or might not.  If dest==pc then it is effectively a branch.
+
+BufferOffset Assembler::as_Imm32Pool(Register dest, uint32_t value,
+                                     Condition c) {
+  PoolHintPun php;
+  php.phd.init(0, c, PoolHintData::PoolDTR, dest);
+  BufferOffset offs = allocLiteralLoadEntry(
+      1, 1, php, (uint8_t*)&value, LiteralDoc(value), nullptr, dest == pc);
+  return offs;
+}
+
+/* static */
+void Assembler::WritePoolEntry(Instruction* addr, Condition c, uint32_t data) {
+  MOZ_ASSERT(addr->is<InstLDR>());
+  *addr->as<InstLDR>()->dest() = data;
+  MOZ_ASSERT(addr->extractCond() == c);
+}
+
+BufferOffset Assembler::as_FImm64Pool(VFPRegister dest, double d, Condition c) {
+  MOZ_ASSERT(dest.isDouble());
+  PoolHintPun php;
+  php.phd.init(0, c, PoolHintData::PoolVDTR, dest);
+  return allocLiteralLoadEntry(1, 2, php, (uint8_t*)&d, LiteralDoc(d));
+}
+
+BufferOffset Assembler::as_FImm32Pool(VFPRegister dest, float f, Condition c) {
+  // Insert floats into the double pool as they have the same limitations on
+  // immediate offset. This wastes 4 bytes padding per float. An alternative
+  // would be to have a separate pool for floats.
+  MOZ_ASSERT(dest.isSingle());
+  PoolHintPun php;
+  php.phd.init(0, c, PoolHintData::PoolVDTR, dest);
+  return allocLiteralLoadEntry(1, 1, php, (uint8_t*)&f, LiteralDoc(f));
+}
+
+// Pool callbacks stuff:
+void Assembler::InsertIndexIntoTag(uint8_t* load_, uint32_t index) {
+  uint32_t* load = (uint32_t*)load_;
+  PoolHintPun php;
+  php.raw = *load;
+  php.phd.setIndex(index);
+  *load = php.raw;
+}
+
+// patchConstantPoolLoad takes the address of the instruction that wants to be
+// patched, and the address of the start of the constant pool, and figures
+// things out from there.
+void Assembler::PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr) {
+  PoolHintData data = *(PoolHintData*)loadAddr;
+  uint32_t* instAddr = (uint32_t*)loadAddr;
+  int offset = (char*)constPoolAddr - (char*)loadAddr;
+  switch (data.getLoadType()) {
+    case PoolHintData::PoolBOGUS:
+      MOZ_CRASH("bogus load type!");
+    case PoolHintData::PoolDTR:
+      Assembler::as_dtr_patch(
+          IsLoad, 32, Offset, data.getReg(),
+          DTRAddr(pc, DtrOffImm(offset + 4 * data.getIndex() - 8)),
+          data.getCond(), instAddr);
+      break;
+    case PoolHintData::PoolBranch:
+      // Either this used to be a poolBranch, and the label was already bound,
+      // so it was replaced with a real branch, or this may happen in the
+      // future. If this is going to happen in the future, then the actual
+      // bits that are written here don't matter (except the condition code,
+      // since that is always preserved across patchings) but if it does not
+      // get bound later, then we want to make sure this is a load from the
+      // pool entry (and the pool entry should be nullptr so it will crash).
+      if (data.isValidPoolHint()) {
+        Assembler::as_dtr_patch(
+            IsLoad, 32, Offset, pc,
+            DTRAddr(pc, DtrOffImm(offset + 4 * data.getIndex() - 8)),
+            data.getCond(), instAddr);
+      }
+      break;
+    case PoolHintData::PoolVDTR: {
+      VFPRegister dest = data.getVFPReg();
+      int32_t imm = offset + (data.getIndex() * 4) - 8;
+      MOZ_ASSERT(-1024 < imm && imm < 1024);
+      Assembler::as_vdtr_patch(IsLoad, dest, VFPAddr(pc, VFPOffImm(imm)),
+                               data.getCond(), instAddr);
+      break;
+    }
+  }
+}
+
+// Atomic instruction stuff:
+
+BufferOffset Assembler::as_ldrexd(Register rt, Register rt2, Register rn,
+                                  Condition c) {
+  MOZ_ASSERT(!(rt.code() & 1) && rt2.code() == rt.code() + 1);
+  MOZ_ASSERT(rt.code() != 14 && rn.code() != 15);
+  return writeInst(0x01b00f9f | (int)c | RT(rt) | RN(rn));
+}
+
+BufferOffset Assembler::as_ldrex(Register rt, Register rn, Condition c) {
+  MOZ_ASSERT(rt.code() != 15 && rn.code() != 15);
+  return writeInst(0x01900f9f | (int)c | RT(rt) | RN(rn));
+}
+
+BufferOffset Assembler::as_ldrexh(Register rt, Register rn, Condition c) {
+  MOZ_ASSERT(rt.code() != 15 && rn.code() != 15);
+  return writeInst(0x01f00f9f | (int)c | RT(rt) | RN(rn));
+}
+
+BufferOffset Assembler::as_ldrexb(Register rt, Register rn, Condition c) {
+  MOZ_ASSERT(rt.code() != 15 && rn.code() != 15);
+  return writeInst(0x01d00f9f | (int)c | RT(rt) | RN(rn));
+}
+
+BufferOffset Assembler::as_strexd(Register rd, Register rt, Register rt2,
+                                  Register rn, Condition c) {
+  MOZ_ASSERT(!(rt.code() & 1) && rt2.code() == rt.code() + 1);
+  MOZ_ASSERT(rt.code() != 14 && rn.code() != 15 && rd.code() != 15);
+  MOZ_ASSERT(rd != rn && rd != rt && rd != rt2);
+  return writeInst(0x01a00f90 | (int)c | RD(rd) | RN(rn) | rt.code());
+}
+
+BufferOffset Assembler::as_strex(Register rd, Register rt, Register rn,
+                                 Condition c) {
+  MOZ_ASSERT(rd != rn && rd != rt);  // True restriction on Cortex-A7 (RPi2)
+  return writeInst(0x01800f90 | (int)c | RD(rd) | RN(rn) | rt.code());
+}
+
+BufferOffset Assembler::as_strexh(Register rd, Register rt, Register rn,
+                                  Condition c) {
+  MOZ_ASSERT(rd != rn && rd != rt);  // True restriction on Cortex-A7 (RPi2)
+  return writeInst(0x01e00f90 | (int)c | RD(rd) | RN(rn) | rt.code());
+}
+
+BufferOffset Assembler::as_strexb(Register rd, Register rt, Register rn,
+                                  Condition c) {
+  MOZ_ASSERT(rd != rn && rd != rt);  // True restriction on Cortex-A7 (RPi2)
+  return writeInst(0x01c00f90 | (int)c | RD(rd) | RN(rn) | rt.code());
+}
+
+BufferOffset Assembler::as_clrex() { return writeInst(0xf57ff01f); }
+
+// Memory barrier stuff:
+
+BufferOffset Assembler::as_dmb(BarrierOption option) {
+  return writeInst(0xf57ff050U | (int)option);
+}
+BufferOffset Assembler::as_dsb(BarrierOption option) {
+  return writeInst(0xf57ff040U | (int)option);
+}
+BufferOffset Assembler::as_isb() {
+  return writeInst(0xf57ff06fU);  // option == SY
+}
+BufferOffset Assembler::as_dsb_trap() {
+  // DSB is "mcr 15, 0, r0, c7, c10, 4".
+  // See eg https://bugs.kde.org/show_bug.cgi?id=228060.
+  // ARMv7 manual, "VMSA CP15 c7 register summary".
+  // Flagged as "legacy" starting with ARMv8, may be disabled on chip, see
+  // ARMv8 manual E2.7.3 and G3.18.16.
+  return writeInst(0xee070f9a);
+}
+BufferOffset Assembler::as_dmb_trap() {
+  // DMB is "mcr 15, 0, r0, c7, c10, 5".
+  // ARMv7 manual, "VMSA CP15 c7 register summary".
+  // Flagged as "legacy" starting with ARMv8, may be disabled on chip, see
+  // ARMv8 manual E2.7.3 and G3.18.16.
+  return writeInst(0xee070fba);
+}
+BufferOffset Assembler::as_isb_trap() {
+  // ISB is "mcr 15, 0, r0, c7, c5, 4".
+  // ARMv7 manual, "VMSA CP15 c7 register summary".
+  // Flagged as "legacy" starting with ARMv8, may be disabled on chip, see
+  // ARMv8 manual E2.7.3 and G3.18.16.
+  return writeInst(0xee070f94);
+}
+
+BufferOffset Assembler::as_csdb() {
+  // NOP (see as_nop) on architectures where this instruction is not defined.
+  //
+  // https://developer.arm.com/-/media/developer/pdf/Cache_Speculation_Side-channels_22Feb18.pdf
+  // CSDB A32: 1110_0011_0010_0000_1111_0000_0001_0100
+  return writeInst(0xe320f000 | 0x14);
+}
+
+// Control flow stuff:
+
+// bx can *only* branch to a register, never to an immediate.
+BufferOffset Assembler::as_bx(Register r, Condition c) {
+  BufferOffset ret = writeInst(((int)c) | OpBx | r.code());
+  return ret;
+}
+
+void Assembler::WritePoolGuard(BufferOffset branch, Instruction* dest,
+                               BufferOffset afterPool) {
+  BOffImm off = afterPool.diffB<BOffImm>(branch);
+  if (off.isInvalid()) {
+    MOZ_CRASH("BOffImm invalid");
+  }
+  *dest = InstBImm(off, Always);
+}
+
+// Branch can branch to an immediate *or* to a register.
+// Branches to immediates are pc relative, branches to registers are absolute.
+BufferOffset Assembler::as_b(BOffImm off, Condition c, Label* documentation) {
+  return writeBranchInst(((int)c) | OpB | off.encode(),
+                         refLabel(documentation));
+}
+
+BufferOffset Assembler::as_b(Label* l, Condition c) {
+  if (l->bound()) {
+    // Note only one instruction is emitted here, the NOP is overwritten.
+    BufferOffset ret = allocBranchInst();
+    if (oom()) {
+      return BufferOffset();
+    }
+
+    BOffImm offset = BufferOffset(l).diffB<BOffImm>(ret);
+    MOZ_RELEASE_ASSERT(!offset.isInvalid(),
+                       "Buffer size limit should prevent this");
+    as_b(offset, c, ret);
+#ifdef JS_DISASM_ARM
+    spewBranch(m_buffer.getInstOrNull(ret), refLabel(l));
+#endif
+    return ret;
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  BufferOffset ret;
+  if (l->used()) {
+    int32_t old = l->offset();
+    MOZ_RELEASE_ASSERT(BOffImm::IsInRange(old),
+                       "Buffer size limit should prevent this");
+    ret = as_b(BOffImm(old), c, l);
+  } else {
+    BOffImm inv;
+    ret = as_b(inv, c, l);
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  l->use(ret.getOffset());
+  return ret;
+}
+
+BufferOffset Assembler::as_b(BOffImm off, Condition c, BufferOffset inst) {
+  // JS_DISASM_ARM NOTE: Can't disassemble here, because numerous callers use
+  // this to patchup old code.  Must disassemble in caller where it makes sense.
+  // Not many callers.
+  *editSrc(inst) = InstBImm(off, c);
+  return inst;
+}
+
+// blx can go to either an immediate or a register.
+// When blx'ing to a register, we change processor state depending on the low
+// bit of the register when blx'ing to an immediate, we *always* change
+// processor state.
+
+BufferOffset Assembler::as_blx(Register r, Condition c) {
+  return writeInst(((int)c) | OpBlx | r.code());
+}
+
+// bl can only branch to an pc-relative immediate offset
+// It cannot change the processor state.
+BufferOffset Assembler::as_bl(BOffImm off, Condition c, Label* documentation) {
+  return writeBranchInst(((int)c) | OpBl | off.encode(),
+                         refLabel(documentation));
+}
+
+BufferOffset Assembler::as_bl(Label* l, Condition c) {
+  if (l->bound()) {
+    // Note only one instruction is emitted here, the NOP is overwritten.
+    BufferOffset ret = allocBranchInst();
+    if (oom()) {
+      return BufferOffset();
+    }
+
+    BOffImm offset = BufferOffset(l).diffB<BOffImm>(ret);
+    MOZ_RELEASE_ASSERT(!offset.isInvalid(),
+                       "Buffer size limit should prevent this");
+
+    as_bl(offset, c, ret);
+#ifdef JS_DISASM_ARM
+    spewBranch(m_buffer.getInstOrNull(ret), refLabel(l));
+#endif
+    return ret;
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  BufferOffset ret;
+  // See if the list was empty.
+  if (l->used()) {
+    int32_t old = l->offset();
+    MOZ_RELEASE_ASSERT(BOffImm::IsInRange(old),
+                       "Buffer size limit should prevent this");
+    ret = as_bl(BOffImm(old), c, l);
+  } else {
+    BOffImm inv;
+    ret = as_bl(inv, c, l);
+  }
+
+  if (oom()) {
+    return BufferOffset();
+  }
+
+  l->use(ret.getOffset());
+  return ret;
+}
+
+BufferOffset Assembler::as_bl(BOffImm off, Condition c, BufferOffset inst) {
+  *editSrc(inst) = InstBLImm(off, c);
+  return inst;
+}
+
+BufferOffset Assembler::as_mrs(Register r, Condition c) {
+  return writeInst(0x010f0000 | int(c) | RD(r));
+}
+
+BufferOffset Assembler::as_msr(Register r, Condition c) {
+  // Hardcode the 'mask' field to 0b11 for now. It is bits 18 and 19, which
+  // are the two high bits of the 'c' in this constant.
+  MOZ_ASSERT((r.code() & ~0xf) == 0);
+  return writeInst(0x012cf000 | int(c) | r.code());
+}
+
+// VFP instructions!
+enum vfp_tags { VfpTag = 0x0C000A00, VfpArith = 0x02000000 };
+
+BufferOffset Assembler::writeVFPInst(vfp_size sz, uint32_t blob) {
+  MOZ_ASSERT((sz & blob) == 0);
+  MOZ_ASSERT((VfpTag & blob) == 0);
+  return writeInst(VfpTag | std::underlying_type_t<vfp_size>(sz) | blob);
+}
+
+/* static */
+void Assembler::WriteVFPInstStatic(vfp_size sz, uint32_t blob, uint32_t* dest) {
+  MOZ_ASSERT((sz & blob) == 0);
+  MOZ_ASSERT((VfpTag & blob) == 0);
+  WriteInstStatic(VfpTag | std::underlying_type_t<vfp_size>(sz) | blob, dest);
+}
+
+// Unityped variants: all registers hold the same (ieee754 single/double)
+// notably not included are vcvt; vmov vd, #imm; vmov rt, vn.
+BufferOffset Assembler::as_vfp_float(VFPRegister vd, VFPRegister vn,
+                                     VFPRegister vm, VFPOp op, Condition c) {
+  // Make sure we believe that all of our operands are the same kind.
+  MOZ_ASSERT_IF(!vn.isMissing(), vd.equiv(vn));
+  MOZ_ASSERT_IF(!vm.isMissing(), vd.equiv(vm));
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+  return writeVFPInst(sz, VD(vd) | VN(vn) | VM(vm) | op | VfpArith | c);
+}
+
+BufferOffset Assembler::as_vadd(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvAdd, c);
+}
+
+BufferOffset Assembler::as_vdiv(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvDiv, c);
+}
+
+BufferOffset Assembler::as_vmul(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvMul, c);
+}
+
+BufferOffset Assembler::as_vnmul(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                 Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvMul, c);
+}
+
+BufferOffset Assembler::as_vnmla(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                 Condition c) {
+  MOZ_CRASH("Feature NYI");
+}
+
+BufferOffset Assembler::as_vnmls(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                 Condition c) {
+  MOZ_CRASH("Feature NYI");
+}
+
+BufferOffset Assembler::as_vneg(VFPRegister vd, VFPRegister vm, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vm, OpvNeg, c);
+}
+
+BufferOffset Assembler::as_vsqrt(VFPRegister vd, VFPRegister vm, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vm, OpvSqrt, c);
+}
+
+BufferOffset Assembler::as_vabs(VFPRegister vd, VFPRegister vm, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vm, OpvAbs, c);
+}
+
+BufferOffset Assembler::as_vsub(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                                Condition c) {
+  return as_vfp_float(vd, vn, vm, OpvSub, c);
+}
+
+BufferOffset Assembler::as_vcmp(VFPRegister vd, VFPRegister vm, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vm, OpvCmp, c);
+}
+
+BufferOffset Assembler::as_vcmpz(VFPRegister vd, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, NoVFPRegister, OpvCmpz, c);
+}
+
+// Specifically, a move between two same sized-registers.
+BufferOffset Assembler::as_vmov(VFPRegister vd, VFPRegister vsrc, Condition c) {
+  return as_vfp_float(vd, NoVFPRegister, vsrc, OpvMov, c);
+}
+
+// Transfer between Core and VFP.
+
+// Unlike the next function, moving between the core registers and vfp registers
+// can't be *that* properly typed. Namely, since I don't want to munge the type
+// VFPRegister to also include core registers. Thus, the core and vfp registers
+// are passed in based on their type, and src/dest is determined by the
+// float2core.
+
+BufferOffset Assembler::as_vxfer(Register vt1, Register vt2, VFPRegister vm,
+                                 FloatToCore_ f2c, Condition c, int idx) {
+  vfp_size sz = IsSingle;
+  if (vm.isDouble()) {
+    // Technically, this can be done with a vmov à la ARM ARM under vmov
+    // however, that requires at least an extra bit saying if the operation
+    // should be performed on the lower or upper half of the double. Moving
+    // a single to/from 2N/2N+1 isn't equivalent, since there are 32 single
+    // registers, and 32 double registers so there is no way to encode the
+    // last 16 double registers.
+    sz = IsDouble;
+    MOZ_ASSERT(idx == 0 || idx == 1);
+    // If we are transferring a single half of the double then it must be
+    // moving a VFP reg to a core reg.
+    MOZ_ASSERT_IF(vt2 == InvalidReg, f2c == FloatToCore);
+    idx = idx << 21;
+  } else {
+    MOZ_ASSERT(idx == 0);
+  }
+
+  if (vt2 == InvalidReg) {
+    return writeVFPInst(sz, WordTransfer |
+                                std::underlying_type_t<FloatToCore_>(f2c) |
+                                std::underlying_type_t<Condition>(c) | RT(vt1) |
+                                maybeRN(vt2) | VN(vm) | idx);
+  }
+
+  // We are doing a 64 bit transfer.
+  return writeVFPInst(sz, DoubleTransfer |
+                              std::underlying_type_t<FloatToCore_>(f2c) |
+                              std::underlying_type_t<Condition>(c) | RT(vt1) |
+                              maybeRN(vt2) | VM(vm) | idx);
+}
+
+enum vcvt_destFloatness { VcvtToInteger = 1 << 18, VcvtToFloat = 0 << 18 };
+enum vcvt_toZero {
+  VcvtToZero =
+      1 << 7,  // Use the default rounding mode, which rounds truncates.
+  VcvtToFPSCR = 0 << 7  // Use whatever rounding mode the fpscr specifies.
+};
+enum vcvt_Signedness {
+  VcvtToSigned = 1 << 16,
+  VcvtToUnsigned = 0 << 16,
+  VcvtFromSigned = 1 << 7,
+  VcvtFromUnsigned = 0 << 7
+};
+
+// Our encoding actually allows just the src and the dest (and their types) to
+// uniquely specify the encoding that we are going to use.
+BufferOffset Assembler::as_vcvt(VFPRegister vd, VFPRegister vm, bool useFPSCR,
+                                Condition c) {
+  // Unlike other cases, the source and dest types cannot be the same.
+  MOZ_ASSERT(!vd.equiv(vm));
+  vfp_size sz = IsDouble;
+  if (vd.isFloat() && vm.isFloat()) {
+    // Doing a float -> float conversion.
+    if (vm.isSingle()) {
+      sz = IsSingle;
+    }
+    return writeVFPInst(sz, c | 0x02B700C0 | VM(vm) | VD(vd));
+  }
+
+  // At least one of the registers should be a float.
+  vcvt_destFloatness destFloat;
+  vcvt_Signedness opSign;
+  vcvt_toZero doToZero = VcvtToFPSCR;
+  MOZ_ASSERT(vd.isFloat() || vm.isFloat());
+  if (vd.isSingle() || vm.isSingle()) {
+    sz = IsSingle;
+  }
+
+  if (vd.isFloat()) {
+    destFloat = VcvtToFloat;
+    opSign = (vm.isSInt()) ? VcvtFromSigned : VcvtFromUnsigned;
+  } else {
+    destFloat = VcvtToInteger;
+    opSign = (vd.isSInt()) ? VcvtToSigned : VcvtToUnsigned;
+    doToZero = useFPSCR ? VcvtToFPSCR : VcvtToZero;
+  }
+  return writeVFPInst(
+      sz, c | 0x02B80040 | VD(vd) | VM(vm) | destFloat | opSign | doToZero);
+}
+
+BufferOffset Assembler::as_vcvtFixed(VFPRegister vd, bool isSigned,
+                                     uint32_t fixedPoint, bool toFixed,
+                                     Condition c) {
+  MOZ_ASSERT(vd.isFloat());
+  uint32_t sx = 0x1;
+  vfp_size sf = vd.isDouble() ? IsDouble : IsSingle;
+  int32_t imm5 = fixedPoint;
+  imm5 = (sx ? 32 : 16) - imm5;
+  MOZ_ASSERT(imm5 >= 0);
+  imm5 = imm5 >> 1 | (imm5 & 1) << 5;
+  return writeVFPInst(sf, 0x02BA0040 | VD(vd) | toFixed << 18 | sx << 7 |
+                              (!isSigned) << 16 | imm5 | c);
+}
+
+// Transfer between VFP and memory.
+static uint32_t EncodeVdtr(LoadStore ls, VFPRegister vd, VFPAddr addr,
+                           Assembler::Condition c) {
+  return ls | 0x01000000 | addr.encode() | VD(vd) | c;
+}
+
+BufferOffset Assembler::as_vdtr(
+    LoadStore ls, VFPRegister vd, VFPAddr addr,
+    Condition c /* vfp doesn't have a wb option */) {
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+  return writeVFPInst(sz, EncodeVdtr(ls, vd, addr, c));
+}
+
+/* static */
+void Assembler::as_vdtr_patch(LoadStore ls, VFPRegister vd, VFPAddr addr,
+                              Condition c, uint32_t* dest) {
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+  WriteVFPInstStatic(sz, EncodeVdtr(ls, vd, addr, c), dest);
+}
+
+// VFP's ldm/stm work differently from the standard arm ones. You can only
+// transfer a range.
+
+BufferOffset Assembler::as_vdtm(LoadStore st, Register rn, VFPRegister vd,
+                                int length,
+                                /* also has update conditions */ Condition c) {
+  MOZ_ASSERT(length <= 16 && length >= 0);
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+
+  if (vd.isDouble()) {
+    length *= 2;
+  }
+
+  return writeVFPInst(sz, dtmLoadStore | RN(rn) | VD(vd) | length | dtmMode |
+                              dtmUpdate | dtmCond);
+}
+
+BufferOffset Assembler::as_vldr_unaligned(VFPRegister vd, Register rn) {
+  MOZ_ASSERT(HasNEON());
+  if (vd.isDouble()) {
+    // vld1 (multiple single elements) with align=0, size=3, numregs=1
+    return writeInst(0xF42007CF | RN(rn) | VD(vd));
+  }
+  // vld1 (single element to single lane) with index=0, size=2
+  MOZ_ASSERT(vd.isFloat());
+  MOZ_ASSERT((vd.code() & 1) == 0);
+  return writeInst(0xF4A0080F | RN(rn) | VD(vd.asDouble()));
+}
+
+BufferOffset Assembler::as_vstr_unaligned(VFPRegister vd, Register rn) {
+  MOZ_ASSERT(HasNEON());
+  if (vd.isDouble()) {
+    // vst1 (multiple single elements) with align=0, size=3, numregs=1
+    return writeInst(0xF40007CF | RN(rn) | VD(vd));
+  }
+  // vst1 (single element from one lane) with index=0, size=2
+  MOZ_ASSERT(vd.isFloat());
+  MOZ_ASSERT((vd.code() & 1) == 0);
+  return writeInst(0xF480080F | RN(rn) | VD(vd.asDouble()));
+}
+
+BufferOffset Assembler::as_vimm(VFPRegister vd, VFPImm imm, Condition c) {
+  MOZ_ASSERT(imm.isValid());
+  vfp_size sz = vd.isDouble() ? IsDouble : IsSingle;
+  return writeVFPInst(sz, c | imm.encode() | VD(vd) | 0x02B00000);
+}
+
+BufferOffset Assembler::as_vmrs(Register r, Condition c) {
+  return writeInst(c | 0x0ef10a10 | RT(r));
+}
+
+BufferOffset Assembler::as_vmsr(Register r, Condition c) {
+  return writeInst(c | 0x0ee10a10 | RT(r));
+}
+
+bool Assembler::nextLink(BufferOffset b, BufferOffset* next) {
+  Instruction branch = *editSrc(b);
+  MOZ_ASSERT(branch.is<InstBranchImm>());
+
+  BOffImm destOff;
+  branch.as<InstBranchImm>()->extractImm(&destOff);
+  if (destOff.isInvalid()) {
+    return false;
+  }
+
+  // Propagate the next link back to the caller, by constructing a new
+  // BufferOffset into the space they provided.
+  new (next) BufferOffset(destOff.decode());
+  return true;
+}
+
+void Assembler::bind(Label* label, BufferOffset boff) {
+#ifdef JS_DISASM_ARM
+  spew_.spewBind(label);
+#endif
+  if (oom()) {
+    // Ensure we always bind the label. This matches what we do on
+    // x86/x64 and silences the assert in ~Label.
+    label->bind(0);
+    return;
+  }
+
+  if (label->used()) {
+    bool more;
+    // If our caller didn't give us an explicit target to bind to then we
+    // want to bind to the location of the next instruction.
+    BufferOffset dest = boff.assigned() ? boff : nextOffset();
+    BufferOffset b(label);
+    do {
+      BufferOffset next;
+      more = nextLink(b, &next);
+      Instruction branch = *editSrc(b);
+      Condition c = branch.extractCond();
+      BOffImm offset = dest.diffB<BOffImm>(b);
+      MOZ_RELEASE_ASSERT(!offset.isInvalid(),
+                         "Buffer size limit should prevent this");
+      if (branch.is<InstBImm>()) {
+        as_b(offset, c, b);
+      } else if (branch.is<InstBLImm>()) {
+        as_bl(offset, c, b);
+      } else {
+        MOZ_CRASH("crazy fixup!");
+      }
+      b = next;
+    } while (more);
+  }
+  label->bind(nextOffset().getOffset());
+  MOZ_ASSERT(!oom());
+}
+
+void Assembler::retarget(Label* label, Label* target) {
+#ifdef JS_DISASM_ARM
+  spew_.spewRetarget(label, target);
+#endif
+  if (label->used() && !oom()) {
+    if (target->bound()) {
+      bind(label, BufferOffset(target));
+    } else if (target->used()) {
+      // The target is not bound but used. Prepend label's branch list
+      // onto target's.
+      BufferOffset labelBranchOffset(label);
+      BufferOffset next;
+
+      // Find the head of the use chain for label.
+      while (nextLink(labelBranchOffset, &next)) {
+        labelBranchOffset = next;
+      }
+
+      // Then patch the head of label's use chain to the tail of target's
+      // use chain, prepending the entire use chain of target.
+      Instruction branch = *editSrc(labelBranchOffset);
+      Condition c = branch.extractCond();
+      int32_t prev = target->offset();
+      target->use(label->offset());
+      if (branch.is<InstBImm>()) {
+        as_b(BOffImm(prev), c, labelBranchOffset);
+      } else if (branch.is<InstBLImm>()) {
+        as_bl(BOffImm(prev), c, labelBranchOffset);
+      } else {
+        MOZ_CRASH("crazy fixup!");
+      }
+    } else {
+      // The target is unbound and unused. We can just take the head of
+      // the list hanging off of label, and dump that into target.
+      target->use(label->offset());
+    }
+  }
+  label->reset();
+}
+
+static int stopBKPT = -1;
+void Assembler::as_bkpt() {
+  // This is a count of how many times a breakpoint instruction has been
+  // generated. It is embedded into the instruction for debugging
+  // purposes. Gdb will print "bkpt xxx" when you attempt to dissassemble a
+  // breakpoint with the number xxx embedded into it. If this breakpoint is
+  // being hit, then you can run (in gdb):
+  //  >b dbg_break
+  //  >b main
+  //  >commands
+  //  >set stopBKPT = xxx
+  //  >c
+  //  >end
+  // which will set a breakpoint on the function dbg_break above set a
+  // scripted breakpoint on main that will set the (otherwise unmodified)
+  // value to the number of the breakpoint, so dbg_break will actuall be
+  // called and finally, when you run the executable, execution will halt when
+  // that breakpoint is generated.
+  static int hit = 0;
+  if (stopBKPT == hit) {
+    dbg_break();
+  }
+  writeInst(0xe1200070 | (hit & 0xf) | ((hit & 0xfff0) << 4));
+  hit++;
+}
+
+BufferOffset Assembler::as_illegal_trap() {
+  // Encoding of the permanently-undefined 'udf' instruction, with the imm16
+  // set to 0.
+  return writeInst(0xe7f000f0);
+}
+
+void Assembler::flushBuffer() { m_buffer.flushPool(); }
+
+void Assembler::enterNoPool(size_t maxInst) { m_buffer.enterNoPool(maxInst); }
+
+void Assembler::leaveNoPool() { m_buffer.leaveNoPool(); }
+
+void Assembler::enterNoNops() { m_buffer.enterNoNops(); }
+
+void Assembler::leaveNoNops() { m_buffer.leaveNoNops(); }
+
+struct PoolHeader : Instruction {
+  struct Header {
+    // The size should take into account the pool header.
+    // The size is in units of Instruction (4 bytes), not byte.
+    uint32_t size : 15;
+    uint32_t isNatural : 1;
+    uint32_t ONES : 16;
+
+    Header(int size_, bool isNatural_)
+        : size(size_), isNatural(isNatural_), ONES(0xffff) {}
+
+    explicit Header(const Instruction* i) {
+      static_assert(sizeof(Header) == sizeof(uint32_t));
+      memcpy(this, i, sizeof(Header));
+      MOZ_ASSERT(ONES == 0xffff);
+    }
+
+    uint32_t raw() const {
+      static_assert(sizeof(Header) == sizeof(uint32_t));
+      uint32_t dest;
+      memcpy(&dest, this, sizeof(Header));
+      return dest;
+    }
+  };
+
+  PoolHeader(int size_, bool isNatural_)
+      : Instruction(Header(size_, isNatural_).raw(), true) {}
+
+  uint32_t size() const {
+    Header tmp(this);
+    return tmp.size;
+  }
+  uint32_t isNatural() const {
+    Header tmp(this);
+    return tmp.isNatural;
+  }
+
+  static bool IsTHIS(const Instruction& i) {
+    return (*i.raw() & 0xffff0000) == 0xffff0000;
+  }
+  static const PoolHeader* AsTHIS(const Instruction& i) {
+    if (!IsTHIS(i)) {
+      return nullptr;
+    }
+    return static_cast<const PoolHeader*>(&i);
+  }
+};
+
+void Assembler::WritePoolHeader(uint8_t* start, Pool* p, bool isNatural) {
+  static_assert(sizeof(PoolHeader) == 4,
+                "PoolHandler must have the correct size.");
+  uint8_t* pool = start + 4;
+  // Go through the usual rigmarole to get the size of the pool.
+  pool += p->getPoolSize();
+  uint32_t size = pool - start;
+  MOZ_ASSERT((size & 3) == 0);
+  size = size >> 2;
+  MOZ_ASSERT(size < (1 << 15));
+  PoolHeader header(size, isNatural);
+  *(PoolHeader*)start = header;
+}
+
+// The size of an arbitrary 32-bit call in the instruction stream. On ARM this
+// sequence is |pc = ldr pc - 4; imm32| given that we never reach the imm32.
+uint32_t Assembler::PatchWrite_NearCallSize() { return sizeof(uint32_t); }
+
+void Assembler::PatchWrite_NearCall(CodeLocationLabel start,
+                                    CodeLocationLabel toCall) {
+  Instruction* inst = (Instruction*)start.raw();
+  // Overwrite whatever instruction used to be here with a call. Since the
+  // destination is in the same function, it will be within range of the
+  // 24 << 2 byte bl instruction.
+  uint8_t* dest = toCall.raw();
+  new (inst) InstBLImm(BOffImm(dest - (uint8_t*)inst), Always);
+}
+
+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
+                                        PatchedImmPtr newValue,
+                                        PatchedImmPtr expectedValue) {
+  Instruction* ptr = reinterpret_cast<Instruction*>(label.raw());
+
+  Register dest;
+  Assembler::RelocStyle rs;
+
+  {
+    InstructionIterator iter(ptr);
+    DebugOnly<const uint32_t*> val = GetPtr32Target(iter, &dest, &rs);
+    MOZ_ASSERT(uint32_t((const uint32_t*)val) == uint32_t(expectedValue.value));
+  }
+
+  // Patch over actual instructions.
+  {
+    InstructionIterator iter(ptr);
+    MacroAssembler::ma_mov_patch(Imm32(int32_t(newValue.value)), dest, Always,
+                                 rs, iter);
+  }
+}
+
+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
+                                        ImmPtr newValue, ImmPtr expectedValue) {
+  PatchDataWithValueCheck(label, PatchedImmPtr(newValue.value),
+                          PatchedImmPtr(expectedValue.value));
+}
+
+// This just stomps over memory with 32 bits of raw data. Its purpose is to
+// overwrite the call of JITed code with 32 bits worth of an offset. This will
+// is only meant to function on code that has been invalidated, so it should be
+// totally safe. Since that instruction will never be executed again, a ICache
+// flush should not be necessary
+void Assembler::PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) {
+  // Raw is going to be the return address.
+  uint32_t* raw = (uint32_t*)label.raw();
+  // Overwrite the 4 bytes before the return address, which will end up being
+  // the call instruction.
+  *(raw - 1) = imm.value;
+}
+
+uint8_t* Assembler::NextInstruction(uint8_t* inst_, uint32_t* count) {
+  if (count != nullptr) {
+    *count += sizeof(Instruction);
+  }
+
+  InstructionIterator iter(reinterpret_cast<Instruction*>(inst_));
+  return reinterpret_cast<uint8_t*>(iter.next());
+}
+
+static bool InstIsGuard(Instruction* inst, const PoolHeader** ph) {
+  Assembler::Condition c = inst->extractCond();
+  if (c != Assembler::Always) {
+    return false;
+  }
+  if (!(inst->is<InstBXReg>() || inst->is<InstBImm>())) {
+    return false;
+  }
+  // See if the next instruction is a pool header.
+  *ph = (inst + 1)->as<const PoolHeader>();
+  return *ph != nullptr;
+}
+
+static bool InstIsGuard(BufferInstructionIterator& iter,
+                        const PoolHeader** ph) {
+  Instruction* inst = iter.cur();
+  Assembler::Condition c = inst->extractCond();
+  if (c != Assembler::Always) {
+    return false;
+  }
+  if (!(inst->is<InstBXReg>() || inst->is<InstBImm>())) {
+    return false;
+  }
+  // See if the next instruction is a pool header.
+  *ph = iter.peek()->as<const PoolHeader>();
+  return *ph != nullptr;
+}
+
+template <class T>
+static bool InstIsBNop(const T& iter) {
+  // In some special situations, it is necessary to insert a NOP into the
+  // instruction stream that nobody knows about, since nobody should know
+  // about it, make sure it gets skipped when Instruction::next() is called.
+  // this generates a very specific nop, namely a branch to the next
+  // instruction.
+  const Instruction* cur = iter.cur();
+  Assembler::Condition c = cur->extractCond();
+  if (c != Assembler::Always) {
+    return false;
+  }
+  if (!cur->is<InstBImm>()) {
+    return false;
+  }
+  InstBImm* b = cur->as<InstBImm>();
+  BOffImm offset;
+  b->extractImm(&offset);
+  return offset.decode() == 4;
+}
+
+Instruction* InstructionIterator::maybeSkipAutomaticInstructions() {
+  // If the current instruction was automatically-inserted, skip past it.
+  const PoolHeader* ph;
+
+  // Loop until an intentionally-placed instruction is found.
+  while (true) {
+    if (InstIsGuard(cur(), &ph)) {
+      // Don't skip a natural guard.
+      if (ph->isNatural()) {
+        return cur();
+      }
+      advanceRaw(1 + ph->size());
+    } else if (InstIsBNop<InstructionIterator>(*this)) {
+      advanceRaw(1);
+    } else {
+      return cur();
+    }
+  }
+}
+
+Instruction* BufferInstructionIterator::maybeSkipAutomaticInstructions() {
+  const PoolHeader* ph;
+  // If this is a guard, and the next instruction is a header, always work
+  // around the pool. If it isn't a guard, then start looking ahead.
+  if (InstIsGuard(*this, &ph)) {
+    // Don't skip a natural guard.
+    if (ph->isNatural()) {
+      return cur();
+    }
+    advance(sizeof(Instruction) * ph->size());
+    return next();
+  }
+  if (InstIsBNop<BufferInstructionIterator>(*this)) {
+    return next();
+  }
+  return cur();
+}
+
+// Cases to be handled:
+// 1) no pools or branches in sight => return this+1
+// 2) branch to next instruction => return this+2, because a nop needed to be
+//    inserted into the stream.
+// 3) this+1 is an artificial guard for a pool => return first instruction
+//    after the pool
+// 4) this+1 is a natural guard => return the branch
+// 5) this is a branch, right before a pool => return first instruction after
+//    the pool
+// in assembly form:
+// 1) add r0, r0, r0 <= this
+//    add r1, r1, r1 <= returned value
+//    add r2, r2, r2
+//
+// 2) add r0, r0, r0 <= this
+//    b foo
+//    foo:
+//    add r2, r2, r2 <= returned value
+//
+// 3) add r0, r0, r0 <= this
+//    b after_pool;
+//    .word 0xffff0002  # bit 15 being 0 indicates that the branch was not
+//                      # requested by the assembler
+//    0xdeadbeef        # the 2 indicates that there is 1 pool entry, and the
+//                      # pool header
+//    add r4, r4, r4 <= returned value
+// 4) add r0, r0, r0 <= this
+//    b after_pool  <= returned value
+//    .word 0xffff8002  # bit 15 being 1 indicates that the branch was
+//                      # requested by the assembler
+//    0xdeadbeef
+//    add r4, r4, r4
+// 5) b after_pool  <= this
+//    .word 0xffff8002  # bit 15 has no bearing on the returned value
+//    0xdeadbeef
+//    add r4, r4, r4  <= returned value
+
+Instruction* InstructionIterator::next() {
+  const PoolHeader* ph;
+
+  // If the current instruction is followed by a pool header,
+  // move past the current instruction and the pool.
+  if (InstIsGuard(cur(), &ph)) {
+    advanceRaw(1 + ph->size());
+    return maybeSkipAutomaticInstructions();
+  }
+
+  // The next instruction is then known to not be a PoolHeader.
+  advanceRaw(1);
+  return maybeSkipAutomaticInstructions();
+}
+
+void Assembler::ToggleToJmp(CodeLocationLabel inst_) {
+  uint32_t* ptr = (uint32_t*)inst_.raw();
+
+  DebugOnly<Instruction*> inst = (Instruction*)inst_.raw();
+  MOZ_ASSERT(inst->is<InstCMP>());
+
+  // Zero bits 20-27, then set 24-27 to be correct for a branch.
+  // 20-23 will be party of the B's immediate, and should be 0.
+  *ptr = (*ptr & ~(0xff << 20)) | (0xa0 << 20);
+}
+
+void Assembler::ToggleToCmp(CodeLocationLabel inst_) {
+  uint32_t* ptr = (uint32_t*)inst_.raw();
+
+  DebugOnly<Instruction*> inst = (Instruction*)inst_.raw();
+  MOZ_ASSERT(inst->is<InstBImm>());
+
+  // Ensure that this masking operation doesn't affect the offset of the
+  // branch instruction when it gets toggled back.
+  MOZ_ASSERT((*ptr & (0xf << 20)) == 0);
+
+  // Also make sure that the CMP is valid. Part of having a valid CMP is that
+  // all of the bits describing the destination in most ALU instructions are
+  // all unset (looks like it is encoding r0).
+  MOZ_ASSERT(toRD(*inst) == r0);
+
+  // Zero out bits 20-27, then set them to be correct for a compare.
+  *ptr = (*ptr & ~(0xff << 20)) | (0x35 << 20);
+}
+
+void Assembler::ToggleCall(CodeLocationLabel inst_, bool enabled) {
+  InstructionIterator iter(reinterpret_cast<Instruction*>(inst_.raw()));
+  MOZ_ASSERT(iter.cur()->is<InstMovW>() || iter.cur()->is<InstLDR>());
+
+  if (iter.cur()->is<InstMovW>()) {
+    // If it looks like the start of a movw/movt sequence, then make sure we
+    // have all of it (and advance the iterator past the full sequence).
+    iter.next();
+    MOZ_ASSERT(iter.cur()->is<InstMovT>());
+  }
+
+  iter.next();
+  MOZ_ASSERT(iter.cur()->is<InstNOP>() || iter.cur()->is<InstBLXReg>());
+
+  if (enabled == iter.cur()->is<InstBLXReg>()) {
+    // Nothing to do.
+    return;
+  }
+
+  Instruction* inst = iter.cur();
+
+  if (enabled) {
+    *inst = InstBLXReg(ScratchRegister, Always);
+  } else {
+    *inst = InstNOP();
+  }
+}
+
+size_t Assembler::ToggledCallSize(uint8_t* code) {
+  InstructionIterator iter(reinterpret_cast<Instruction*>(code));
+  MOZ_ASSERT(iter.cur()->is<InstMovW>() || iter.cur()->is<InstLDR>());
+
+  if (iter.cur()->is<InstMovW>()) {
+    // If it looks like the start of a movw/movt sequence, then make sure we
+    // have all of it (and advance the iterator past the full sequence).
+    iter.next();
+    MOZ_ASSERT(iter.cur()->is<InstMovT>());
+  }
+
+  iter.next();
+  MOZ_ASSERT(iter.cur()->is<InstNOP>() || iter.cur()->is<InstBLXReg>());
+  return uintptr_t(iter.cur()) + 4 - uintptr_t(code);
+}
+
+uint32_t Assembler::NopFill = 0;
+
+uint32_t Assembler::GetNopFill() {
+  static bool isSet = false;
+  if (!isSet) {
+    char* fillStr = getenv("ARM_ASM_NOP_FILL");
+    uint32_t fill;
+    if (fillStr && sscanf(fillStr, "%u", &fill) == 1) {
+      NopFill = fill;
+    }
+    if (NopFill > 8) {
+      MOZ_CRASH("Nop fill > 8 is not supported");
+    }
+    isSet = true;
+  }
+  return NopFill;
+}
+
+uint32_t Assembler::AsmPoolMaxOffset = 1024;
+
+uint32_t Assembler::GetPoolMaxOffset() {
+  static bool isSet = false;
+  if (!isSet) {
+    char* poolMaxOffsetStr = getenv("ASM_POOL_MAX_OFFSET");
+    uint32_t poolMaxOffset;
+    if (poolMaxOffsetStr &&
+        sscanf(poolMaxOffsetStr, "%u", &poolMaxOffset) == 1) {
+      AsmPoolMaxOffset = poolMaxOffset;
+    }
+    isSet = true;
+  }
+  return AsmPoolMaxOffset;
+}
+
+SecondScratchRegisterScope::SecondScratchRegisterScope(MacroAssembler& masm)
+    : AutoRegisterScope(masm, masm.getSecondScratchReg()) {}
+
+#ifdef JS_DISASM_ARM
+
+/* static */
+void Assembler::disassembleInstruction(const Instruction* i,
+                                       DisasmBuffer& buffer) {
+  disasm::NameConverter converter;
+  disasm::Disassembler dasm(converter);
+  uint8_t* loc = reinterpret_cast<uint8_t*>(const_cast<uint32_t*>(i->raw()));
+  dasm.InstructionDecode(buffer, loc);
+}
+
+void Assembler::initDisassembler() {
+  // The line is normally laid out like this:
+  //
+  // xxxxxxxx        ldr r, op   ; comment
+  //
+  // where xx...x is the instruction bit pattern.
+  //
+  // Labels are laid out by themselves to line up with the instructions above
+  // and below:
+  //
+  //            nnnn:
+  //
+  // Branch targets are normally on the same line as the branch instruction,
+  // but when they cannot be they will be on a line by themselves, indented
+  // significantly:
+  //
+  //                     -> label
+
+  spew_.setLabelIndent("          ");             // 10
+  spew_.setTargetIndent("                    ");  // 20
+}
+
+void Assembler::finishDisassembler() { spew_.spewOrphans(); }
+
+// Labels are named as they are encountered by adding names to a
+// table, using the Label address as the key.  This is made tricky by
+// the (memory for) Label objects being reused, but reused label
+// objects are recognizable from being marked as not used or not
+// bound.  See spew_.refLabel().
+//
+// In a number of cases there is no information about the target, and
+// we just end up printing "patchable constant load to PC".  This is
+// true especially for jumps to bailout handlers (which have no
+// names).  See allocLiteralLoadEntry() and its callers.  In some cases
+// (loop back edges) some information about the intended target may be
+// propagated from higher levels, and if so it's printed here.
+
+void Assembler::spew(Instruction* i) {
+  if (spew_.isDisabled() || !i) {
+    return;
+  }
+
+  DisasmBuffer buffer;
+  disassembleInstruction(i, buffer);
+  spew_.spew("%s", buffer.start());
+}
+
+// If a target label is known, always print that and do not attempt to
+// disassemble the branch operands, as they will often be encoding
+// metainformation (pointers for a chain of jump instructions), and
+// not actual branch targets.
+
+void Assembler::spewBranch(Instruction* i, const LabelDoc& target) {
+  if (spew_.isDisabled() || !i) {
+    return;
+  }
+
+  DisasmBuffer buffer;
+  disassembleInstruction(i, buffer);
+
+  char labelBuf[128];
+  labelBuf[0] = 0;
+
+  bool haveTarget = target.valid;
+  if (!haveTarget) {
+    SprintfLiteral(labelBuf, "  -> (link-time target)");
+  }
+
+  if (InstBranchImm::IsTHIS(*i)) {
+    InstBranchImm* bimm = InstBranchImm::AsTHIS(*i);
+    BOffImm destOff;
+    bimm->extractImm(&destOff);
+    if (destOff.isInvalid() || haveTarget) {
+      // The target information in the instruction is likely garbage, so remove
+      // it. The target label will in any case be printed if we have it.
+      //
+      // The format of the instruction disassembly is [0-9a-f]{8}\s+\S+\s+.*,
+      // where the \S+ string is the opcode.  Strip everything after the opcode,
+      // and attach the label if we have it.
+      int i;
+      for (i = 8; i < buffer.length() && buffer[i] == ' '; i++) {
+      }
+      for (; i < buffer.length() && buffer[i] != ' '; i++) {
+      }
+      buffer[i] = 0;
+      if (haveTarget) {
+        SprintfLiteral(labelBuf, "  -> %d%s", target.doc,
+                       !target.bound ? "f" : "");
+        haveTarget = false;
+      }
+    }
+  }
+  spew_.spew("%s%s", buffer.start(), labelBuf);
+
+  if (haveTarget) {
+    spew_.spewRef(target);
+  }
+}
+
+void Assembler::spewLiteralLoad(PoolHintPun& php, bool loadToPC,
+                                const Instruction* i, const LiteralDoc& doc) {
+  if (spew_.isDisabled()) {
+    return;
+  }
+
+  char litbuf[2048];
+  spew_.formatLiteral(doc, litbuf, sizeof(litbuf));
+
+  // See patchConstantPoolLoad, above.  We assemble the instruction into a
+  // buffer with a zero offset, as documentation, but the offset will be
+  // patched later.
+
+  uint32_t inst;
+  PoolHintData& data = php.phd;
+  switch (php.phd.getLoadType()) {
+    case PoolHintData::PoolDTR:
+      Assembler::as_dtr_patch(IsLoad, 32, Offset, data.getReg(),
+                              DTRAddr(pc, DtrOffImm(0)), data.getCond(), &inst);
+      break;
+    case PoolHintData::PoolBranch:
+      if (data.isValidPoolHint()) {
+        Assembler::as_dtr_patch(IsLoad, 32, Offset, pc,
+                                DTRAddr(pc, DtrOffImm(0)), data.getCond(),
+                                &inst);
+      }
+      break;
+    case PoolHintData::PoolVDTR:
+      Assembler::as_vdtr_patch(IsLoad, data.getVFPReg(),
+                               VFPAddr(pc, VFPOffImm(0)), data.getCond(),
+                               &inst);
+      break;
+
+    default:
+      MOZ_CRASH();
+  }
+
+  DisasmBuffer buffer;
+  disasm::NameConverter converter;
+  disasm::Disassembler dasm(converter);
+  dasm.InstructionDecode(buffer, reinterpret_cast<uint8_t*>(&inst));
+  spew_.spew("%s    ; .const %s", buffer.start(), litbuf);
+}
+
+#endif  // JS_DISASM_ARM
diff --git a/js/src/jit/arm/Assembler-arm.h b/js/src/jit/arm/Assembler-arm.h
new file mode 100644
index 0000000000..fdbac15a80
--- /dev/null
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -0,0 +1,2296 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_Assembler_arm_h
+#define jit_arm_Assembler_arm_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include <algorithm>
+#include <iterator>
+#include <type_traits>
+
+#include "jit/arm/Architecture-arm.h"
+#include "jit/arm/disasm/Disasm-arm.h"
+#include "jit/CompactBuffer.h"
+#include "jit/JitCode.h"
+#include "jit/shared/Assembler-shared.h"
+#include "jit/shared/Disassembler-shared.h"
+#include "jit/shared/IonAssemblerBufferWithConstantPools.h"
+#include "wasm/WasmTypeDecls.h"
+
+union PoolHintPun;
+
+namespace js {
+namespace jit {
+
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+using LabelDoc = DisassemblerSpew::LabelDoc;
+
+// NOTE: there are duplicates in this list! Sometimes we want to specifically
+// refer to the link register as a link register (bl lr is much clearer than bl
+// r14). HOWEVER, this register can easily be a gpr when it is not busy holding
+// the return address.
+static constexpr Register r0{Registers::r0};
+static constexpr Register r1{Registers::r1};
+static constexpr Register r2{Registers::r2};
+static constexpr Register r3{Registers::r3};
+static constexpr Register r4{Registers::r4};
+static constexpr Register r5{Registers::r5};
+static constexpr Register r6{Registers::r6};
+static constexpr Register r7{Registers::r7};
+static constexpr Register r8{Registers::r8};
+static constexpr Register r9{Registers::r9};
+static constexpr Register r10{Registers::r10};
+static constexpr Register r11{Registers::r11};
+static constexpr Register r12{Registers::ip};
+static constexpr Register ip{Registers::ip};
+static constexpr Register sp{Registers::sp};
+static constexpr Register r14{Registers::lr};
+static constexpr Register lr{Registers::lr};
+static constexpr Register pc{Registers::pc};
+
+static constexpr Register ScratchRegister{Registers::ip};
+
+// Helper class for ScratchRegister usage. Asserts that only one piece
+// of code thinks it has exclusive ownership of the scratch register.
+struct ScratchRegisterScope : public AutoRegisterScope {
+  explicit ScratchRegisterScope(MacroAssembler& masm)
+      : AutoRegisterScope(masm, ScratchRegister) {}
+};
+
+struct SecondScratchRegisterScope : public AutoRegisterScope {
+  explicit SecondScratchRegisterScope(MacroAssembler& masm);
+};
+
+static constexpr Register OsrFrameReg = r3;
+static constexpr Register CallTempReg0 = r5;
+static constexpr Register CallTempReg1 = r6;
+static constexpr Register CallTempReg2 = r7;
+static constexpr Register CallTempReg3 = r8;
+static constexpr Register CallTempReg4 = r0;
+static constexpr Register CallTempReg5 = r1;
+
+static constexpr Register IntArgReg0 = r0;
+static constexpr Register IntArgReg1 = r1;
+static constexpr Register IntArgReg2 = r2;
+static constexpr Register IntArgReg3 = r3;
+static constexpr Register HeapReg = r10;
+static constexpr Register CallTempNonArgRegs[] = {r5, r6, r7, r8};
+static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs);
+
+// These register assignments for the 64-bit atomic ops are frequently too
+// constraining, but we have no way of expressing looser constraints to the
+// register allocator.
+
+// CompareExchange: Any two odd/even pairs would do for `new` and `out`, and any
+// pair would do for `old`, so long as none of them overlap.
+
+static constexpr Register CmpXchgOldLo = r4;
+static constexpr Register CmpXchgOldHi = r5;
+static constexpr Register64 CmpXchgOld64 =
+    Register64(CmpXchgOldHi, CmpXchgOldLo);
+static constexpr Register CmpXchgNewLo = IntArgReg2;
+static constexpr Register CmpXchgNewHi = IntArgReg3;
+static constexpr Register64 CmpXchgNew64 =
+    Register64(CmpXchgNewHi, CmpXchgNewLo);
+static constexpr Register CmpXchgOutLo = IntArgReg0;
+static constexpr Register CmpXchgOutHi = IntArgReg1;
+static constexpr Register64 CmpXchgOut64 =
+    Register64(CmpXchgOutHi, CmpXchgOutLo);
+
+// Exchange: Any two non-equal odd/even pairs would do for `new` and `out`.
+
+static constexpr Register XchgNewLo = IntArgReg2;
+static constexpr Register XchgNewHi = IntArgReg3;
+static constexpr Register64 XchgNew64 = Register64(XchgNewHi, XchgNewLo);
+static constexpr Register XchgOutLo = IntArgReg0;
+static constexpr Register XchgOutHi = IntArgReg1;
+
+// Atomic rmw operations: Any two odd/even pairs would do for `tmp` and `out`,
+// and any pair would do for `val`, so long as none of them overlap.
+
+static constexpr Register FetchOpValLo = r4;
+static constexpr Register FetchOpValHi = r5;
+static constexpr Register64 FetchOpVal64 =
+    Register64(FetchOpValHi, FetchOpValLo);
+static constexpr Register FetchOpTmpLo = IntArgReg2;
+static constexpr Register FetchOpTmpHi = IntArgReg3;
+static constexpr Register64 FetchOpTmp64 =
+    Register64(FetchOpTmpHi, FetchOpTmpLo);
+static constexpr Register FetchOpOutLo = IntArgReg0;
+static constexpr Register FetchOpOutHi = IntArgReg1;
+static constexpr Register64 FetchOpOut64 =
+    Register64(FetchOpOutHi, FetchOpOutLo);
+
+class ABIArgGenerator {
+  unsigned intRegIndex_;
+  unsigned floatRegIndex_;
+  uint32_t stackOffset_;
+  ABIArg current_;
+
+  // ARM can either use HardFp (use float registers for float arguments), or
+  // SoftFp (use general registers for float arguments) ABI.  We keep this
+  // switch as a runtime switch because wasm always use the HardFp back-end
+  // while the calls to native functions have to use the one provided by the
+  // system.
+  bool useHardFp_;
+
+  ABIArg softNext(MIRType argType);
+  ABIArg hardNext(MIRType argType);
+
+ public:
+  ABIArgGenerator();
+
+  void setUseHardFp(bool useHardFp) {
+    MOZ_ASSERT(intRegIndex_ == 0 && floatRegIndex_ == 0);
+    useHardFp_ = useHardFp;
+  }
+  ABIArg next(MIRType argType);
+  ABIArg& current() { return current_; }
+  uint32_t stackBytesConsumedSoFar() const { return stackOffset_; }
+  void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; }
+};
+
+bool IsUnaligned(const wasm::MemoryAccessDesc& access);
+
+// These registers may be volatile or nonvolatile.
+static constexpr Register ABINonArgReg0 = r4;
+static constexpr Register ABINonArgReg1 = r5;
+static constexpr Register ABINonArgReg2 = r6;
+static constexpr Register ABINonArgReg3 = r7;
+
+// This register may be volatile or nonvolatile. Avoid d15 which is the
+// ScratchDoubleReg_.
+static constexpr FloatRegister ABINonArgDoubleReg{FloatRegisters::d8,
+                                                  VFPRegister::Double};
+
+// These registers may be volatile or nonvolatile.
+// Note: these three registers are all guaranteed to be different
+static constexpr Register ABINonArgReturnReg0 = r4;
+static constexpr Register ABINonArgReturnReg1 = r5;
+static constexpr Register ABINonVolatileReg = r6;
+
+// This register is guaranteed to be clobberable during the prologue and
+// epilogue of an ABI call which must preserve both ABI argument, return
+// and non-volatile registers.
+static constexpr Register ABINonArgReturnVolatileReg = lr;
+
+// Instance pointer argument register for WebAssembly functions. This must not
+// alias any other register used for passing function arguments or return
+// values. Preserved by WebAssembly functions.
+static constexpr Register InstanceReg = r9;
+
+// Registers used for wasm table calls. These registers must be disjoint
+// from the ABI argument registers, InstanceReg and each other.
+static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmTableCallSigReg = ABINonArgReg2;
+static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
+
+// Registers used for ref calls.
+static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmCallRefReg = ABINonArgReg3;
+
+// Register used as a scratch along the return path in the fast js -> wasm stub
+// code.  This must not overlap ReturnReg, JSReturnOperand, or InstanceReg.
+// It must be a volatile register.
+static constexpr Register WasmJitEntryReturnScratch = r5;
+
+static constexpr Register PreBarrierReg = r1;
+
+static constexpr Register InterpreterPCReg = r9;
+
+static constexpr Register InvalidReg{Registers::invalid_reg};
+static constexpr FloatRegister InvalidFloatReg;
+
+static constexpr Register JSReturnReg_Type = r3;
+static constexpr Register JSReturnReg_Data = r2;
+static constexpr Register StackPointer = sp;
+static constexpr Register FramePointer = r11;
+static constexpr Register ReturnReg = r0;
+static constexpr Register64 ReturnReg64(r1, r0);
+
+// The attribute '__value_in_regs' alters the calling convention of a function
+// so that a structure of up to four elements can be returned via the argument
+// registers rather than being written to memory.
+static constexpr Register ReturnRegVal0 = IntArgReg0;
+static constexpr Register ReturnRegVal1 = IntArgReg1;
+static constexpr Register ReturnRegVal2 = IntArgReg2;
+static constexpr Register ReturnRegVal3 = IntArgReg3;
+
+static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::d0,
+                                                   VFPRegister::Single};
+static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0,
+                                                  VFPRegister::Double};
+static constexpr FloatRegister ReturnSimd128Reg = InvalidFloatReg;
+static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s30,
+                                                     VFPRegister::Single};
+static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d15,
+                                                    VFPRegister::Double};
+static constexpr FloatRegister ScratchSimd128Reg = InvalidFloatReg;
+static constexpr FloatRegister ScratchUIntReg = {FloatRegisters::d15,
+                                                 VFPRegister::UInt};
+static constexpr FloatRegister ScratchIntReg = {FloatRegisters::d15,
+                                                VFPRegister::Int};
+
+// Do not reference ScratchFloat32Reg_ directly, use ScratchFloat32Scope
+// instead.
+struct ScratchFloat32Scope : public AutoFloatRegisterScope {
+  explicit ScratchFloat32Scope(MacroAssembler& masm)
+      : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {}
+};
+
+// Do not reference ScratchDoubleReg_ directly, use ScratchDoubleScope instead.
+struct ScratchDoubleScope : public AutoFloatRegisterScope {
+  explicit ScratchDoubleScope(MacroAssembler& masm)
+      : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {}
+};
+
+// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use
+// JSReturnOperand).
+static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpMatcherStringReg = CallTempReg1;
+static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2;
+
+// Registers used by RegExpExecTest stub (do not use ReturnReg).
+static constexpr Register RegExpExecTestRegExpReg = CallTempReg0;
+static constexpr Register RegExpExecTestStringReg = CallTempReg1;
+
+// Registers used by RegExpSearcher stub (do not use ReturnReg).
+static constexpr Register RegExpSearcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpSearcherStringReg = CallTempReg1;
+static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2;
+
+static constexpr FloatRegister d0 = {FloatRegisters::d0, VFPRegister::Double};
+static constexpr FloatRegister d1 = {FloatRegisters::d1, VFPRegister::Double};
+static constexpr FloatRegister d2 = {FloatRegisters::d2, VFPRegister::Double};
+static constexpr FloatRegister d3 = {FloatRegisters::d3, VFPRegister::Double};
+static constexpr FloatRegister d4 = {FloatRegisters::d4, VFPRegister::Double};
+static constexpr FloatRegister d5 = {FloatRegisters::d5, VFPRegister::Double};
+static constexpr FloatRegister d6 = {FloatRegisters::d6, VFPRegister::Double};
+static constexpr FloatRegister d7 = {FloatRegisters::d7, VFPRegister::Double};
+static constexpr FloatRegister d8 = {FloatRegisters::d8, VFPRegister::Double};
+static constexpr FloatRegister d9 = {FloatRegisters::d9, VFPRegister::Double};
+static constexpr FloatRegister d10 = {FloatRegisters::d10, VFPRegister::Double};
+static constexpr FloatRegister d11 = {FloatRegisters::d11, VFPRegister::Double};
+static constexpr FloatRegister d12 = {FloatRegisters::d12, VFPRegister::Double};
+static constexpr FloatRegister d13 = {FloatRegisters::d13, VFPRegister::Double};
+static constexpr FloatRegister d14 = {FloatRegisters::d14, VFPRegister::Double};
+static constexpr FloatRegister d15 = {FloatRegisters::d15, VFPRegister::Double};
+
+// For maximal awesomeness, 8 should be sufficent. ldrd/strd (dual-register
+// load/store) operate in a single cycle when the address they are dealing with
+// is 8 byte aligned. Also, the ARM abi wants the stack to be 8 byte aligned at
+// function boundaries. I'm trying to make sure this is always true.
+static constexpr uint32_t ABIStackAlignment = 8;
+static constexpr uint32_t CodeAlignment = 8;
+static constexpr uint32_t JitStackAlignment = 8;
+
+static constexpr uint32_t JitStackValueAlignment =
+    JitStackAlignment / sizeof(Value);
+static_assert(JitStackAlignment % sizeof(Value) == 0 &&
+                  JitStackValueAlignment >= 1,
+              "Stack alignment should be a non-zero multiple of sizeof(Value)");
+
+static constexpr uint32_t SimdMemoryAlignment = 8;
+
+static_assert(CodeAlignment % SimdMemoryAlignment == 0,
+              "Code alignment should be larger than any of the alignments "
+              "which are used for "
+              "the constant sections of the code buffer.  Thus it should be "
+              "larger than the "
+              "alignment for SIMD constants.");
+
+static_assert(JitStackAlignment % SimdMemoryAlignment == 0,
+              "Stack alignment should be larger than any of the alignments "
+              "which are used for "
+              "spilled values.  Thus it should be larger than the alignment "
+              "for SIMD accesses.");
+
+static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
+static const uint32_t WasmTrapInstructionLength = 4;
+
+// See comments in wasm::GenerateFunctionPrologue.  The difference between these
+// is the size of the largest callable prologue on the platform.
+static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
+
+static const Scale ScalePointer = TimesFour;
+
+class Instruction;
+class InstBranchImm;
+uint32_t RM(Register r);
+uint32_t RS(Register r);
+uint32_t RD(Register r);
+uint32_t RT(Register r);
+uint32_t RN(Register r);
+
+uint32_t maybeRD(Register r);
+uint32_t maybeRT(Register r);
+uint32_t maybeRN(Register r);
+
+Register toRN(Instruction i);
+Register toRM(Instruction i);
+Register toRD(Instruction i);
+Register toR(Instruction i);
+
+class VFPRegister;
+uint32_t VD(VFPRegister vr);
+uint32_t VN(VFPRegister vr);
+uint32_t VM(VFPRegister vr);
+
+// For being passed into the generic vfp instruction generator when there is an
+// instruction that only takes two registers.
+static constexpr VFPRegister NoVFPRegister(VFPRegister::Double, 0, false, true);
+
+struct ImmTag : public Imm32 {
+  explicit ImmTag(JSValueTag mask) : Imm32(int32_t(mask)) {}
+};
+
+struct ImmType : public ImmTag {
+  explicit ImmType(JSValueType type) : ImmTag(JSVAL_TYPE_TO_TAG(type)) {}
+};
+
+enum Index {
+  Offset = 0 << 21 | 1 << 24,
+  PreIndex = 1 << 21 | 1 << 24,
+  PostIndex = 0 << 21 | 0 << 24
+  // The docs were rather unclear on this. It sounds like
+  // 1 << 21 | 0 << 24 encodes dtrt.
+};
+
+enum IsImmOp2_ { IsImmOp2 = 1 << 25, IsNotImmOp2 = 0 << 25 };
+enum IsImmDTR_ { IsImmDTR = 0 << 25, IsNotImmDTR = 1 << 25 };
+// For the extra memory operations, ldrd, ldrsb, ldrh.
+enum IsImmEDTR_ { IsImmEDTR = 1 << 22, IsNotImmEDTR = 0 << 22 };
+
+enum ShiftType {
+  LSL = 0,   // << 5
+  LSR = 1,   // << 5
+  ASR = 2,   // << 5
+  ROR = 3,   // << 5
+  RRX = ROR  // RRX is encoded as ROR with a 0 offset.
+};
+
+// Modes for STM/LDM. Names are the suffixes applied to the instruction.
+enum DTMMode {
+  A = 0 << 24,  // empty / after
+  B = 1 << 24,  // full / before
+  D = 0 << 23,  // decrement
+  I = 1 << 23,  // increment
+  DA = D | A,
+  DB = D | B,
+  IA = I | A,
+  IB = I | B
+};
+
+enum DTMWriteBack { WriteBack = 1 << 21, NoWriteBack = 0 << 21 };
+
+// Condition code updating mode.
+enum SBit {
+  SetCC = 1 << 20,   // Set condition code.
+  LeaveCC = 0 << 20  // Leave condition code unchanged.
+};
+
+enum LoadStore { IsLoad = 1 << 20, IsStore = 0 << 20 };
+
+// You almost never want to use this directly. Instead, you wantto pass in a
+// signed constant, and let this bit be implicitly set for you. This is however,
+// necessary if we want a negative index.
+enum IsUp_ { IsUp = 1 << 23, IsDown = 0 << 23 };
+enum ALUOp {
+  OpMov = 0xd << 21,
+  OpMvn = 0xf << 21,
+  OpAnd = 0x0 << 21,
+  OpBic = 0xe << 21,
+  OpEor = 0x1 << 21,
+  OpOrr = 0xc << 21,
+  OpAdc = 0x5 << 21,
+  OpAdd = 0x4 << 21,
+  OpSbc = 0x6 << 21,
+  OpSub = 0x2 << 21,
+  OpRsb = 0x3 << 21,
+  OpRsc = 0x7 << 21,
+  OpCmn = 0xb << 21,
+  OpCmp = 0xa << 21,
+  OpTeq = 0x9 << 21,
+  OpTst = 0x8 << 21,
+  OpInvalid = -1
+};
+
+enum MULOp {
+  OpmMul = 0 << 21,
+  OpmMla = 1 << 21,
+  OpmUmaal = 2 << 21,
+  OpmMls = 3 << 21,
+  OpmUmull = 4 << 21,
+  OpmUmlal = 5 << 21,
+  OpmSmull = 6 << 21,
+  OpmSmlal = 7 << 21
+};
+enum BranchTag {
+  OpB = 0x0a000000,
+  OpBMask = 0x0f000000,
+  OpBDestMask = 0x00ffffff,
+  OpBl = 0x0b000000,
+  OpBlx = 0x012fff30,
+  OpBx = 0x012fff10
+};
+
+// Just like ALUOp, but for the vfp instruction set.
+enum VFPOp {
+  OpvMul = 0x2 << 20,
+  OpvAdd = 0x3 << 20,
+  OpvSub = 0x3 << 20 | 0x1 << 6,
+  OpvDiv = 0x8 << 20,
+  OpvMov = 0xB << 20 | 0x1 << 6,
+  OpvAbs = 0xB << 20 | 0x3 << 6,
+  OpvNeg = 0xB << 20 | 0x1 << 6 | 0x1 << 16,
+  OpvSqrt = 0xB << 20 | 0x3 << 6 | 0x1 << 16,
+  OpvCmp = 0xB << 20 | 0x1 << 6 | 0x4 << 16,
+  OpvCmpz = 0xB << 20 | 0x1 << 6 | 0x5 << 16
+};
+
+// Negate the operation, AND negate the immediate that we were passed in.
+ALUOp ALUNeg(ALUOp op, Register dest, Register scratch, Imm32* imm,
+             Register* negDest);
+bool can_dbl(ALUOp op);
+bool condsAreSafe(ALUOp op);
+
+// If there is a variant of op that has a dest (think cmp/sub) return that
+// variant of it.
+ALUOp getDestVariant(ALUOp op);
+
+static constexpr ValueOperand JSReturnOperand{JSReturnReg_Type,
+                                              JSReturnReg_Data};
+static const ValueOperand softfpReturnOperand = ValueOperand(r1, r0);
+
+// All of these classes exist solely to shuffle data into the various operands.
+// For example Operand2 can be an imm8, a register-shifted-by-a-constant or a
+// register-shifted-by-a-register. We represent this in C++ by having a base
+// class Operand2, which just stores the 32 bits of data as they will be encoded
+// in the instruction. You cannot directly create an Operand2 since it is
+// tricky, and not entirely sane to do so. Instead, you create one of its child
+// classes, e.g. Imm8. Imm8's constructor takes a single integer argument. Imm8
+// will verify that its argument can be encoded as an ARM 12 bit imm8, encode it
+// using an Imm8data, and finally call its parent's (Operand2) constructor with
+// the Imm8data. The Operand2 constructor will then call the Imm8data's encode()
+// function to extract the raw bits from it.
+//
+// In the future, we should be able to extract data from the Operand2 by asking
+// it for its component Imm8data structures. The reason this is so horribly
+// round-about is we wanted to have Imm8 and RegisterShiftedRegister inherit
+// directly from Operand2 but have all of them take up only a single word of
+// storage. We also wanted to avoid passing around raw integers at all since
+// they are error prone.
+class Op2Reg;
+class O2RegImmShift;
+class O2RegRegShift;
+
+namespace datastore {
+
+class Reg {
+  // The "second register".
+  uint32_t rm_ : 4;
+  // Do we get another register for shifting.
+  uint32_t rrs_ : 1;
+  uint32_t type_ : 2;
+  // We'd like this to be a more sensible encoding, but that would need to be
+  // a struct and that would not pack :(
+  uint32_t shiftAmount_ : 5;
+
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t pad_ : 20;
+
+ public:
+  Reg(uint32_t rm, ShiftType type, uint32_t rsr, uint32_t shiftAmount)
+      : rm_(rm), rrs_(rsr), type_(type), shiftAmount_(shiftAmount), pad_(0) {}
+  explicit Reg(const Op2Reg& op) { memcpy(this, &op, sizeof(*this)); }
+
+  uint32_t shiftAmount() const { return shiftAmount_; }
+
+  uint32_t encode() const {
+    return rm_ | (rrs_ << 4) | (type_ << 5) | (shiftAmount_ << 7);
+  }
+};
+
+// Op2 has a mode labelled "<imm8m>", which is arm's magical immediate encoding.
+// Some instructions actually get 8 bits of data, which is called Imm8Data
+// below. These should have edit distance > 1, but this is how it is for now.
+class Imm8mData {
+  uint32_t data_ : 8;
+  uint32_t rot_ : 4;
+
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t buff_ : 19;
+
+ private:
+  // Throw in an extra bit that will be 1 if we can't encode this properly.
+  // if we can encode it properly, a simple "|" will still suffice to meld it
+  // into the instruction.
+  uint32_t invalid_ : 1;
+
+ public:
+  // Default constructor makes an invalid immediate.
+  Imm8mData() : data_(0xff), rot_(0xf), buff_(0), invalid_(true) {}
+
+  Imm8mData(uint32_t data, uint32_t rot)
+      : data_(data), rot_(rot), buff_(0), invalid_(false) {
+    MOZ_ASSERT(data == data_);
+    MOZ_ASSERT(rot == rot_);
+  }
+
+  bool invalid() const { return invalid_; }
+
+  uint32_t encode() const {
+    MOZ_ASSERT(!invalid_);
+    return data_ | (rot_ << 8);
+  };
+};
+
+class Imm8Data {
+  uint32_t imm4L_ : 4;
+
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t pad_ : 4;
+
+ private:
+  uint32_t imm4H_ : 4;
+
+ public:
+  explicit Imm8Data(uint32_t imm) : imm4L_(imm & 0xf), imm4H_(imm >> 4) {
+    MOZ_ASSERT(imm <= 0xff);
+  }
+
+  uint32_t encode() const { return imm4L_ | (imm4H_ << 8); };
+};
+
+// VLDR/VSTR take an 8 bit offset, which is implicitly left shifted by 2.
+class Imm8VFPOffData {
+  uint32_t data_;
+
+ public:
+  explicit Imm8VFPOffData(uint32_t imm) : data_(imm) {
+    MOZ_ASSERT((imm & ~(0xff)) == 0);
+  }
+  uint32_t encode() const { return data_; };
+};
+
+// ARM can magically encode 256 very special immediates to be moved into a
+// register.
+struct Imm8VFPImmData {
+  // This structure's members are public and it has no constructor to
+  // initialize them, for a very special reason. Were this structure to
+  // have a constructor, the initialization for DoubleEncoder's internal
+  // table (see below) would require a rather large static constructor on
+  // some of our supported compilers. The known solution to this is to mark
+  // the constructor constexpr, but, again, some of our supported
+  // compilers don't support constexpr! So we are reduced to public
+  // members and eschewing a constructor in hopes that the initialization
+  // of DoubleEncoder's table is correct.
+  uint32_t imm4L : 4;
+  uint32_t imm4H : 4;
+  int32_t isInvalid : 24;
+
+  uint32_t encode() const {
+    // This assert is an attempting at ensuring that we don't create random
+    // instances of this structure and then asking to encode() it.
+    MOZ_ASSERT(isInvalid == 0);
+    return imm4L | (imm4H << 16);
+  };
+};
+
+class Imm12Data {
+  uint32_t data_ : 12;
+
+ public:
+  explicit Imm12Data(uint32_t imm) : data_(imm) { MOZ_ASSERT(data_ == imm); }
+
+  uint32_t encode() const { return data_; }
+};
+
+class RIS {
+  uint32_t shiftAmount_ : 5;
+
+ public:
+  explicit RIS(uint32_t imm) : shiftAmount_(imm) {
+    MOZ_ASSERT(shiftAmount_ == imm);
+  }
+
+  explicit RIS(Reg r) : shiftAmount_(r.shiftAmount()) {}
+
+  uint32_t encode() const { return shiftAmount_; }
+};
+
+class RRS {
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t mustZero_ : 1;
+
+ private:
+  // The register that holds the shift amount.
+  uint32_t rs_ : 4;
+
+ public:
+  explicit RRS(uint32_t rs) : rs_(rs) { MOZ_ASSERT(rs_ == rs); }
+
+  uint32_t encode() const { return rs_ << 1; }
+};
+
+}  // namespace datastore
+
+class MacroAssemblerARM;
+class Operand;
+
+class Operand2 {
+  friend class Operand;
+  friend class MacroAssemblerARM;
+  friend class InstALU;
+
+  uint32_t oper_ : 31;
+  uint32_t invalid_ : 1;
+
+ protected:
+  explicit Operand2(datastore::Imm8mData base)
+      : oper_(base.invalid() ? -1 : (base.encode() | uint32_t(IsImmOp2))),
+        invalid_(base.invalid()) {}
+
+  explicit Operand2(datastore::Reg base)
+      : oper_(base.encode() | uint32_t(IsNotImmOp2)), invalid_(false) {}
+
+ private:
+  explicit Operand2(uint32_t blob) : oper_(blob), invalid_(false) {}
+
+ public:
+  bool isO2Reg() const { return !(oper_ & IsImmOp2); }
+
+  Op2Reg toOp2Reg() const;
+
+  bool isImm8() const { return oper_ & IsImmOp2; }
+
+  bool invalid() const { return invalid_; }
+
+  uint32_t encode() const { return oper_; }
+};
+
+class Imm8 : public Operand2 {
+ public:
+  explicit Imm8(uint32_t imm) : Operand2(EncodeImm(imm)) {}
+
+  static datastore::Imm8mData EncodeImm(uint32_t imm) {
+    // RotateLeft below may not be called with a shift of zero.
+    if (imm <= 0xFF) {
+      return datastore::Imm8mData(imm, 0);
+    }
+
+    // An encodable integer has a maximum of 8 contiguous set bits,
+    // with an optional wrapped left rotation to even bit positions.
+    for (int rot = 1; rot < 16; rot++) {
+      uint32_t rotimm = mozilla::RotateLeft(imm, rot * 2);
+      if (rotimm <= 0xFF) {
+        return datastore::Imm8mData(rotimm, rot);
+      }
+    }
+    return datastore::Imm8mData();
+  }
+
+  // Pair template?
+  struct TwoImm8mData {
+    datastore::Imm8mData fst_, snd_;
+
+    TwoImm8mData() = default;
+
+    TwoImm8mData(datastore::Imm8mData fst, datastore::Imm8mData snd)
+        : fst_(fst), snd_(snd) {}
+
+    datastore::Imm8mData fst() const { return fst_; }
+    datastore::Imm8mData snd() const { return snd_; }
+  };
+
+  static TwoImm8mData EncodeTwoImms(uint32_t);
+};
+
+class Op2Reg : public Operand2 {
+ public:
+  explicit Op2Reg(Register rm, ShiftType type, datastore::RIS shiftImm)
+      : Operand2(datastore::Reg(rm.code(), type, 0, shiftImm.encode())) {}
+
+  explicit Op2Reg(Register rm, ShiftType type, datastore::RRS shiftReg)
+      : Operand2(datastore::Reg(rm.code(), type, 1, shiftReg.encode())) {}
+};
+
+static_assert(sizeof(Op2Reg) == sizeof(datastore::Reg),
+              "datastore::Reg(const Op2Reg&) constructor relies on Reg/Op2Reg "
+              "having same size");
+
+class O2RegImmShift : public Op2Reg {
+ public:
+  explicit O2RegImmShift(Register rn, ShiftType type, uint32_t shift)
+      : Op2Reg(rn, type, datastore::RIS(shift)) {}
+};
+
+class O2RegRegShift : public Op2Reg {
+ public:
+  explicit O2RegRegShift(Register rn, ShiftType type, Register rs)
+      : Op2Reg(rn, type, datastore::RRS(rs.code())) {}
+};
+
+O2RegImmShift O2Reg(Register r);
+O2RegImmShift lsl(Register r, int amt);
+O2RegImmShift lsr(Register r, int amt);
+O2RegImmShift asr(Register r, int amt);
+O2RegImmShift rol(Register r, int amt);
+O2RegImmShift ror(Register r, int amt);
+
+O2RegRegShift lsl(Register r, Register amt);
+O2RegRegShift lsr(Register r, Register amt);
+O2RegRegShift asr(Register r, Register amt);
+O2RegRegShift ror(Register r, Register amt);
+
+// An offset from a register to be used for ldr/str. This should include the
+// sign bit, since ARM has "signed-magnitude" offsets. That is it encodes an
+// unsigned offset, then the instruction specifies if the offset is positive or
+// negative. The +/- bit is necessary if the instruction set wants to be able to
+// have a negative register offset e.g. ldr pc, [r1,-r2];
+class DtrOff {
+  uint32_t data_;
+
+ protected:
+  explicit DtrOff(datastore::Imm12Data immdata, IsUp_ iu)
+      : data_(immdata.encode() | uint32_t(IsImmDTR) | uint32_t(iu)) {}
+
+  explicit DtrOff(datastore::Reg reg, IsUp_ iu = IsUp)
+      : data_(reg.encode() | uint32_t(IsNotImmDTR) | iu) {}
+
+ public:
+  uint32_t encode() const { return data_; }
+};
+
+class DtrOffImm : public DtrOff {
+ public:
+  explicit DtrOffImm(int32_t imm)
+      : DtrOff(datastore::Imm12Data(mozilla::Abs(imm)),
+               imm >= 0 ? IsUp : IsDown) {
+    MOZ_ASSERT(mozilla::Abs(imm) < 4096);
+  }
+};
+
+class DtrOffReg : public DtrOff {
+  // These are designed to be called by a constructor of a subclass.
+  // Constructing the necessary RIS/RRS structures is annoying.
+
+ protected:
+  explicit DtrOffReg(Register rn, ShiftType type, datastore::RIS shiftImm,
+                     IsUp_ iu = IsUp)
+      : DtrOff(datastore::Reg(rn.code(), type, 0, shiftImm.encode()), iu) {}
+
+  explicit DtrOffReg(Register rn, ShiftType type, datastore::RRS shiftReg,
+                     IsUp_ iu = IsUp)
+      : DtrOff(datastore::Reg(rn.code(), type, 1, shiftReg.encode()), iu) {}
+};
+
+class DtrRegImmShift : public DtrOffReg {
+ public:
+  explicit DtrRegImmShift(Register rn, ShiftType type, uint32_t shift,
+                          IsUp_ iu = IsUp)
+      : DtrOffReg(rn, type, datastore::RIS(shift), iu) {}
+};
+
+class DtrRegRegShift : public DtrOffReg {
+ public:
+  explicit DtrRegRegShift(Register rn, ShiftType type, Register rs,
+                          IsUp_ iu = IsUp)
+      : DtrOffReg(rn, type, datastore::RRS(rs.code()), iu) {}
+};
+
+// We will frequently want to bundle a register with its offset so that we have
+// an "operand" to a load instruction.
+class DTRAddr {
+  friend class Operand;
+
+  uint32_t data_;
+
+ public:
+  explicit DTRAddr(Register reg, DtrOff dtr)
+      : data_(dtr.encode() | (reg.code() << 16)) {}
+
+  uint32_t encode() const { return data_; }
+
+  Register getBase() const { return Register::FromCode((data_ >> 16) & 0xf); }
+};
+
+// Offsets for the extended data transfer instructions:
+// ldrsh, ldrd, ldrsb, etc.
+class EDtrOff {
+  uint32_t data_;
+
+ protected:
+  explicit EDtrOff(datastore::Imm8Data imm8, IsUp_ iu = IsUp)
+      : data_(imm8.encode() | IsImmEDTR | uint32_t(iu)) {}
+
+  explicit EDtrOff(Register rm, IsUp_ iu = IsUp)
+      : data_(rm.code() | IsNotImmEDTR | iu) {}
+
+ public:
+  uint32_t encode() const { return data_; }
+};
+
+class EDtrOffImm : public EDtrOff {
+ public:
+  explicit EDtrOffImm(int32_t imm)
+      : EDtrOff(datastore::Imm8Data(mozilla::Abs(imm)),
+                (imm >= 0) ? IsUp : IsDown) {
+    MOZ_ASSERT(mozilla::Abs(imm) < 256);
+  }
+};
+
+// This is the most-derived class, since the extended data transfer instructions
+// don't support any sort of modifying the "index" operand.
+class EDtrOffReg : public EDtrOff {
+ public:
+  explicit EDtrOffReg(Register rm) : EDtrOff(rm) {}
+};
+
+class EDtrAddr {
+  uint32_t data_;
+
+ public:
+  explicit EDtrAddr(Register r, EDtrOff off) : data_(RN(r) | off.encode()) {}
+
+  uint32_t encode() const { return data_; }
+#ifdef DEBUG
+  Register maybeOffsetRegister() const {
+    if (data_ & IsImmEDTR) {
+      return InvalidReg;
+    }
+    return Register::FromCode(data_ & 0xf);
+  }
+#endif
+};
+
+class VFPOff {
+  uint32_t data_;
+
+ protected:
+  explicit VFPOff(datastore::Imm8VFPOffData imm, IsUp_ isup)
+      : data_(imm.encode() | uint32_t(isup)) {}
+
+ public:
+  uint32_t encode() const { return data_; }
+};
+
+class VFPOffImm : public VFPOff {
+ public:
+  explicit VFPOffImm(int32_t imm)
+      : VFPOff(datastore::Imm8VFPOffData(mozilla::Abs(imm) / 4),
+               imm < 0 ? IsDown : IsUp) {
+    MOZ_ASSERT(mozilla::Abs(imm) <= 255 * 4);
+  }
+};
+
+class VFPAddr {
+  friend class Operand;
+
+  uint32_t data_;
+
+ public:
+  explicit VFPAddr(Register base, VFPOff off)
+      : data_(RN(base) | off.encode()) {}
+
+  uint32_t encode() const { return data_; }
+};
+
+class VFPImm {
+  uint32_t data_;
+
+ public:
+  explicit VFPImm(uint32_t topWordOfDouble);
+
+  static const VFPImm One;
+
+  uint32_t encode() const { return data_; }
+  bool isValid() const { return data_ != (~0U); }
+};
+
+// A BOffImm is an immediate that is used for branches. Namely, it is the offset
+// that will be encoded in the branch instruction. This is the only sane way of
+// constructing a branch.
+class BOffImm {
+  friend class InstBranchImm;
+
+  uint32_t data_;
+
+ public:
+  explicit BOffImm(int offset) : data_((offset - 8) >> 2 & 0x00ffffff) {
+    MOZ_ASSERT((offset & 0x3) == 0);
+    if (!IsInRange(offset)) {
+      MOZ_CRASH("BOffImm offset out of range");
+    }
+  }
+
+  explicit BOffImm() : data_(INVALID) {}
+
+ private:
+  explicit BOffImm(const Instruction& inst);
+
+ public:
+  static const uint32_t INVALID = 0x00800000;
+
+  uint32_t encode() const { return data_; }
+  int32_t decode() const { return ((int32_t(data_) << 8) >> 6) + 8; }
+
+  static bool IsInRange(int offset) {
+    if ((offset - 8) < -33554432) {
+      return false;
+    }
+    if ((offset - 8) > 33554428) {
+      return false;
+    }
+    return true;
+  }
+
+  bool isInvalid() const { return data_ == INVALID; }
+  Instruction* getDest(Instruction* src) const;
+};
+
+class Imm16 {
+  uint32_t lower_ : 12;
+
+ protected:
+  // Mark as a protected field to avoid unused private field warnings.
+  uint32_t pad_ : 4;
+
+ private:
+  uint32_t upper_ : 4;
+  uint32_t invalid_ : 12;
+
+ public:
+  explicit Imm16();
+  explicit Imm16(uint32_t imm);
+  explicit Imm16(Instruction& inst);
+
+  uint32_t encode() const { return lower_ | (upper_ << 16); }
+  uint32_t decode() const { return lower_ | (upper_ << 12); }
+
+  bool isInvalid() const { return invalid_; }
+};
+
+// I would preffer that these do not exist, since there are essentially no
+// instructions that would ever take more than one of these, however, the MIR
+// wants to only have one type of arguments to functions, so bugger.
+class Operand {
+  // The encoding of registers is the same for OP2, DTR and EDTR yet the type
+  // system doesn't let us express this, so choices must be made.
+ public:
+  enum class Tag : uint8_t { OP2, MEM, FOP };
+
+ private:
+  uint32_t tag_ : 8;
+  uint32_t reg_ : 5;
+  int32_t offset_;
+
+ protected:
+  Operand(Tag tag, uint32_t regCode, int32_t offset)
+      : tag_(static_cast<uint32_t>(tag)), reg_(regCode), offset_(offset) {}
+
+ public:
+  explicit Operand(Register reg) : Operand(Tag::OP2, reg.code(), 0) {}
+
+  explicit Operand(FloatRegister freg) : Operand(Tag::FOP, freg.code(), 0) {}
+
+  explicit Operand(Register base, Imm32 off)
+      : Operand(Tag::MEM, base.code(), off.value) {}
+
+  explicit Operand(Register base, int32_t off)
+      : Operand(Tag::MEM, base.code(), off) {}
+
+  explicit Operand(const Address& addr)
+      : Operand(Tag::MEM, addr.base.code(), addr.offset) {}
+
+ public:
+  Tag tag() const { return static_cast<Tag>(tag_); }
+
+  Operand2 toOp2() const {
+    MOZ_ASSERT(tag() == Tag::OP2);
+    return O2Reg(Register::FromCode(reg_));
+  }
+
+  Register toReg() const {
+    MOZ_ASSERT(tag() == Tag::OP2);
+    return Register::FromCode(reg_);
+  }
+
+  Address toAddress() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return Address(Register::FromCode(reg_), offset_);
+  }
+  int32_t disp() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return offset_;
+  }
+
+  int32_t base() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return reg_;
+  }
+  Register baseReg() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return Register::FromCode(reg_);
+  }
+  DTRAddr toDTRAddr() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return DTRAddr(baseReg(), DtrOffImm(offset_));
+  }
+  VFPAddr toVFPAddr() const {
+    MOZ_ASSERT(tag() == Tag::MEM);
+    return VFPAddr(baseReg(), VFPOffImm(offset_));
+  }
+};
+
+inline Imm32 Imm64::firstHalf() const { return low(); }
+
+inline Imm32 Imm64::secondHalf() const { return hi(); }
+
+class InstructionIterator {
+ private:
+  Instruction* inst_;
+
+ public:
+  explicit InstructionIterator(Instruction* inst) : inst_(inst) {
+    maybeSkipAutomaticInstructions();
+  }
+
+  // Advances to the next intentionally-inserted instruction.
+  Instruction* next();
+
+  // Advances past any automatically-inserted instructions.
+  Instruction* maybeSkipAutomaticInstructions();
+
+  Instruction* cur() const { return inst_; }
+
+ protected:
+  // Advances past the given number of instruction-length bytes.
+  inline void advanceRaw(ptrdiff_t instructions = 1);
+};
+
+class Assembler;
+typedef js::jit::AssemblerBufferWithConstantPools<1024, 4, Instruction,
+                                                  Assembler>
+    ARMBuffer;
+
+class Assembler : public AssemblerShared {
+ public:
+  // ARM conditional constants:
+  enum ARMCondition : uint32_t {
+    EQ = 0x00000000,  // Zero
+    NE = 0x10000000,  // Non-zero
+    CS = 0x20000000,
+    CC = 0x30000000,
+    MI = 0x40000000,
+    PL = 0x50000000,
+    VS = 0x60000000,
+    VC = 0x70000000,
+    HI = 0x80000000,
+    LS = 0x90000000,
+    GE = 0xa0000000,
+    LT = 0xb0000000,
+    GT = 0xc0000000,
+    LE = 0xd0000000,
+    AL = 0xe0000000
+  };
+
+  enum Condition : uint32_t {
+    Equal = EQ,
+    NotEqual = NE,
+    Above = HI,
+    AboveOrEqual = CS,
+    Below = CC,
+    BelowOrEqual = LS,
+    GreaterThan = GT,
+    GreaterThanOrEqual = GE,
+    LessThan = LT,
+    LessThanOrEqual = LE,
+    Overflow = VS,
+    CarrySet = CS,
+    CarryClear = CC,
+    Signed = MI,
+    NotSigned = PL,
+    Zero = EQ,
+    NonZero = NE,
+    Always = AL,
+
+    VFP_NotEqualOrUnordered = NE,
+    VFP_Equal = EQ,
+    VFP_Unordered = VS,
+    VFP_NotUnordered = VC,
+    VFP_GreaterThanOrEqualOrUnordered = CS,
+    VFP_GreaterThanOrEqual = GE,
+    VFP_GreaterThanOrUnordered = HI,
+    VFP_GreaterThan = GT,
+    VFP_LessThanOrEqualOrUnordered = LE,
+    VFP_LessThanOrEqual = LS,
+    VFP_LessThanOrUnordered = LT,
+    VFP_LessThan = CC  // MI is valid too.
+  };
+
+  // Bit set when a DoubleCondition does not map to a single ARM condition.
+  // The macro assembler has to special-case these conditions, or else
+  // ConditionFromDoubleCondition will complain.
+  static const int DoubleConditionBitSpecial = 0x1;
+
+  enum DoubleCondition : uint32_t {
+    // These conditions will only evaluate to true if the comparison is
+    // ordered - i.e. neither operand is NaN.
+    DoubleOrdered = VFP_NotUnordered,
+    DoubleEqual = VFP_Equal,
+    DoubleNotEqual = VFP_NotEqualOrUnordered | DoubleConditionBitSpecial,
+    DoubleGreaterThan = VFP_GreaterThan,
+    DoubleGreaterThanOrEqual = VFP_GreaterThanOrEqual,
+    DoubleLessThan = VFP_LessThan,
+    DoubleLessThanOrEqual = VFP_LessThanOrEqual,
+    // If either operand is NaN, these conditions always evaluate to true.
+    DoubleUnordered = VFP_Unordered,
+    DoubleEqualOrUnordered = VFP_Equal | DoubleConditionBitSpecial,
+    DoubleNotEqualOrUnordered = VFP_NotEqualOrUnordered,
+    DoubleGreaterThanOrUnordered = VFP_GreaterThanOrUnordered,
+    DoubleGreaterThanOrEqualOrUnordered = VFP_GreaterThanOrEqualOrUnordered,
+    DoubleLessThanOrUnordered = VFP_LessThanOrUnordered,
+    DoubleLessThanOrEqualOrUnordered = VFP_LessThanOrEqualOrUnordered
+  };
+
+  Condition getCondition(uint32_t inst) {
+    return (Condition)(0xf0000000 & inst);
+  }
+  static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) {
+    MOZ_ASSERT(!(cond & DoubleConditionBitSpecial));
+    return static_cast<Condition>(cond);
+  }
+
+  enum BarrierOption {
+    BarrierSY = 15,  // Full system barrier
+    BarrierST = 14   // StoreStore barrier
+  };
+
+  // This should be protected, but since CodeGenerator wants to use it, it
+  // needs to go out here :(
+
+  BufferOffset nextOffset() { return m_buffer.nextOffset(); }
+
+ protected:
+  // Shim around AssemblerBufferWithConstantPools::allocEntry.
+  BufferOffset allocLiteralLoadEntry(size_t numInst, unsigned numPoolEntries,
+                                     PoolHintPun& php, uint8_t* data,
+                                     const LiteralDoc& doc = LiteralDoc(),
+                                     ARMBuffer::PoolEntry* pe = nullptr,
+                                     bool loadToPC = false);
+
+  Instruction* editSrc(BufferOffset bo) { return m_buffer.getInst(bo); }
+
+#ifdef JS_DISASM_ARM
+  typedef disasm::EmbeddedVector<char, disasm::ReasonableBufferSize>
+      DisasmBuffer;
+
+  static void disassembleInstruction(const Instruction* i,
+                                     DisasmBuffer& buffer);
+
+  void initDisassembler();
+  void finishDisassembler();
+  void spew(Instruction* i);
+  void spewBranch(Instruction* i, const LabelDoc& target);
+  void spewLiteralLoad(PoolHintPun& php, bool loadToPC, const Instruction* offs,
+                       const LiteralDoc& doc);
+#endif
+
+ public:
+  void resetCounter();
+  static uint32_t NopFill;
+  static uint32_t GetNopFill();
+  static uint32_t AsmPoolMaxOffset;
+  static uint32_t GetPoolMaxOffset();
+
+ protected:
+  // Structure for fixing up pc-relative loads/jumps when a the machine code
+  // gets moved (executable copy, gc, etc.).
+  class RelativePatch {
+    void* target_;
+    RelocationKind kind_;
+
+   public:
+    RelativePatch(void* target, RelocationKind kind)
+        : target_(target), kind_(kind) {}
+    void* target() const { return target_; }
+    RelocationKind kind() const { return kind_; }
+  };
+
+  // TODO: this should actually be a pool-like object. It is currently a big
+  // hack, and probably shouldn't exist.
+  js::Vector<RelativePatch, 8, SystemAllocPolicy> jumps_;
+
+  CompactBufferWriter jumpRelocations_;
+  CompactBufferWriter dataRelocations_;
+
+  ARMBuffer m_buffer;
+
+#ifdef JS_DISASM_ARM
+  DisassemblerSpew spew_;
+#endif
+
+ public:
+  // For the alignment fill use NOP: 0x0320f000 or (Always | InstNOP::NopInst).
+  // For the nopFill use a branch to the next instruction: 0xeaffffff.
+  Assembler()
+      : m_buffer(1, 1, 8, GetPoolMaxOffset(), 8, 0xe320f000, 0xeaffffff,
+                 GetNopFill()),
+        isFinished(false),
+        dtmActive(false),
+        dtmCond(Always) {
+#ifdef JS_DISASM_ARM
+    initDisassembler();
+#endif
+  }
+
+  ~Assembler() {
+#ifdef JS_DISASM_ARM
+    finishDisassembler();
+#endif
+  }
+
+  void setUnlimitedBuffer() { m_buffer.setUnlimited(); }
+
+  static Condition InvertCondition(Condition cond);
+  static Condition UnsignedCondition(Condition cond);
+  static Condition ConditionWithoutEqual(Condition cond);
+
+  static DoubleCondition InvertCondition(DoubleCondition cond);
+
+  void writeDataRelocation(BufferOffset offset, ImmGCPtr ptr) {
+    // Raw GC pointer relocations and Value relocations both end up in
+    // Assembler::TraceDataRelocations.
+    if (ptr.value) {
+      if (gc::IsInsideNursery(ptr.value)) {
+        embedsNurseryPointers_ = true;
+      }
+      dataRelocations_.writeUnsigned(offset.getOffset());
+    }
+  }
+
+  enum RelocBranchStyle { B_MOVWT, B_LDR_BX, B_LDR, B_MOVW_ADD };
+
+  enum RelocStyle { L_MOVWT, L_LDR };
+
+ public:
+  // Given the start of a Control Flow sequence, grab the value that is
+  // finally branched to given the start of a function that loads an address
+  // into a register get the address that ends up in the register.
+  template <class Iter>
+  static const uint32_t* GetCF32Target(Iter* iter);
+
+  static uintptr_t GetPointer(uint8_t*);
+  static const uint32_t* GetPtr32Target(InstructionIterator iter,
+                                        Register* dest = nullptr,
+                                        RelocStyle* rs = nullptr);
+
+  bool oom() const;
+
+  void setPrinter(Sprinter* sp) {
+#ifdef JS_DISASM_ARM
+    spew_.setPrinter(sp);
+#endif
+  }
+
+  Register getStackPointer() const { return StackPointer; }
+
+ private:
+  bool isFinished;
+
+ protected:
+  LabelDoc refLabel(const Label* label) {
+#ifdef JS_DISASM_ARM
+    return spew_.refLabel(label);
+#else
+    return LabelDoc();
+#endif
+  }
+
+ public:
+  void finish();
+  bool appendRawCode(const uint8_t* code, size_t numBytes);
+  bool reserve(size_t size);
+  bool swapBuffer(wasm::Bytes& bytes);
+  void copyJumpRelocationTable(uint8_t* dest);
+  void copyDataRelocationTable(uint8_t* dest);
+
+  // Size of the instruction stream, in bytes, after pools are flushed.
+  size_t size() const;
+  // Size of the jump relocation table, in bytes.
+  size_t jumpRelocationTableBytes() const;
+  size_t dataRelocationTableBytes() const;
+
+  // Size of the data table, in bytes.
+  size_t bytesNeeded() const;
+
+  // Write a single instruction into the instruction stream.  Very hot,
+  // inlined for performance
+  MOZ_ALWAYS_INLINE BufferOffset writeInst(uint32_t x) {
+    MOZ_ASSERT(hasCreator());
+    BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+    spew(m_buffer.getInstOrNull(offs));
+#endif
+    return offs;
+  }
+
+  // As above, but also mark the instruction as a branch.  Very hot, inlined
+  // for performance
+  MOZ_ALWAYS_INLINE BufferOffset
+  writeBranchInst(uint32_t x, const LabelDoc& documentation) {
+    BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+    spewBranch(m_buffer.getInstOrNull(offs), documentation);
+#endif
+    return offs;
+  }
+
+  // Write a placeholder NOP for a branch into the instruction stream
+  // (in order to adjust assembler addresses and mark it as a branch), it will
+  // be overwritten subsequently.
+  BufferOffset allocBranchInst();
+
+  // A static variant for the cases where we don't want to have an assembler
+  // object.
+  static void WriteInstStatic(uint32_t x, uint32_t* dest);
+
+ public:
+  void writeCodePointer(CodeLabel* label);
+
+  void haltingAlign(int alignment);
+  void nopAlign(int alignment);
+  BufferOffset as_nop();
+  BufferOffset as_alu(Register dest, Register src1, Operand2 op2, ALUOp op,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_mov(Register dest, Operand2 op2, SBit s = LeaveCC,
+                      Condition c = Always);
+  BufferOffset as_mvn(Register dest, Operand2 op2, SBit s = LeaveCC,
+                      Condition c = Always);
+
+  static void as_alu_patch(Register dest, Register src1, Operand2 op2, ALUOp op,
+                           SBit s, Condition c, uint32_t* pos);
+  static void as_mov_patch(Register dest, Operand2 op2, SBit s, Condition c,
+                           uint32_t* pos);
+
+  // Logical operations:
+  BufferOffset as_and(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_bic(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_eor(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_orr(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  // Reverse byte operations:
+  BufferOffset as_rev(Register dest, Register src, Condition c = Always);
+  BufferOffset as_rev16(Register dest, Register src, Condition c = Always);
+  BufferOffset as_revsh(Register dest, Register src, Condition c = Always);
+  // Mathematical operations:
+  BufferOffset as_adc(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_add(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_sbc(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_sub(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_rsb(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_rsc(Register dest, Register src1, Operand2 op2,
+                      SBit s = LeaveCC, Condition c = Always);
+  // Test operations:
+  BufferOffset as_cmn(Register src1, Operand2 op2, Condition c = Always);
+  BufferOffset as_cmp(Register src1, Operand2 op2, Condition c = Always);
+  BufferOffset as_teq(Register src1, Operand2 op2, Condition c = Always);
+  BufferOffset as_tst(Register src1, Operand2 op2, Condition c = Always);
+
+  // Sign extension operations:
+  BufferOffset as_sxtb(Register dest, Register src, int rotate,
+                       Condition c = Always);
+  BufferOffset as_sxth(Register dest, Register src, int rotate,
+                       Condition c = Always);
+  BufferOffset as_uxtb(Register dest, Register src, int rotate,
+                       Condition c = Always);
+  BufferOffset as_uxth(Register dest, Register src, int rotate,
+                       Condition c = Always);
+
+  // Not quite ALU worthy, but useful none the less: These also have the issue
+  // of these being formatted completly differently from the standard ALU
+  // operations.
+  BufferOffset as_movw(Register dest, Imm16 imm, Condition c = Always);
+  BufferOffset as_movt(Register dest, Imm16 imm, Condition c = Always);
+
+  static void as_movw_patch(Register dest, Imm16 imm, Condition c,
+                            Instruction* pos);
+  static void as_movt_patch(Register dest, Imm16 imm, Condition c,
+                            Instruction* pos);
+
+  BufferOffset as_genmul(Register d1, Register d2, Register rm, Register rn,
+                         MULOp op, SBit s, Condition c = Always);
+  BufferOffset as_mul(Register dest, Register src1, Register src2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_mla(Register dest, Register acc, Register src1, Register src2,
+                      SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_umaal(Register dest1, Register dest2, Register src1,
+                        Register src2, Condition c = Always);
+  BufferOffset as_mls(Register dest, Register acc, Register src1, Register src2,
+                      Condition c = Always);
+  BufferOffset as_umull(Register dest1, Register dest2, Register src1,
+                        Register src2, SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_umlal(Register dest1, Register dest2, Register src1,
+                        Register src2, SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_smull(Register dest1, Register dest2, Register src1,
+                        Register src2, SBit s = LeaveCC, Condition c = Always);
+  BufferOffset as_smlal(Register dest1, Register dest2, Register src1,
+                        Register src2, SBit s = LeaveCC, Condition c = Always);
+
+  BufferOffset as_sdiv(Register dest, Register num, Register div,
+                       Condition c = Always);
+  BufferOffset as_udiv(Register dest, Register num, Register div,
+                       Condition c = Always);
+  BufferOffset as_clz(Register dest, Register src, Condition c = Always);
+
+  // Data transfer instructions: ldr, str, ldrb, strb.
+  // Using an int to differentiate between 8 bits and 32 bits is overkill.
+  BufferOffset as_dtr(LoadStore ls, int size, Index mode, Register rt,
+                      DTRAddr addr, Condition c = Always);
+
+  static void as_dtr_patch(LoadStore ls, int size, Index mode, Register rt,
+                           DTRAddr addr, Condition c, uint32_t* dest);
+
+  // Handles all of the other integral data transferring functions:
+  // ldrsb, ldrsh, ldrd, etc. The size is given in bits.
+  BufferOffset as_extdtr(LoadStore ls, int size, bool IsSigned, Index mode,
+                         Register rt, EDtrAddr addr, Condition c = Always);
+
+  BufferOffset as_dtm(LoadStore ls, Register rn, uint32_t mask, DTMMode mode,
+                      DTMWriteBack wb, Condition c = Always);
+
+  // Overwrite a pool entry with new data.
+  static void WritePoolEntry(Instruction* addr, Condition c, uint32_t data);
+
+  // Load a 32 bit immediate from a pool into a register.
+  BufferOffset as_Imm32Pool(Register dest, uint32_t value,
+                            Condition c = Always);
+
+  // Load a 64 bit floating point immediate from a pool into a register.
+  BufferOffset as_FImm64Pool(VFPRegister dest, double value,
+                             Condition c = Always);
+  // Load a 32 bit floating point immediate from a pool into a register.
+  BufferOffset as_FImm32Pool(VFPRegister dest, float value,
+                             Condition c = Always);
+
+  // Atomic instructions: ldrexd, ldrex, ldrexh, ldrexb, strexd, strex, strexh,
+  // strexb.
+  //
+  // The doubleword, halfword, and byte versions are available from ARMv6K
+  // forward.
+  //
+  // The word versions are available from ARMv6 forward and can be used to
+  // implement the halfword and byte versions on older systems.
+
+  // LDREXD rt, rt2, [rn].  Constraint: rt even register, rt2=rt+1.
+  BufferOffset as_ldrexd(Register rt, Register rt2, Register rn,
+                         Condition c = Always);
+
+  // LDREX rt, [rn]
+  BufferOffset as_ldrex(Register rt, Register rn, Condition c = Always);
+  BufferOffset as_ldrexh(Register rt, Register rn, Condition c = Always);
+  BufferOffset as_ldrexb(Register rt, Register rn, Condition c = Always);
+
+  // STREXD rd, rt, rt2, [rn].  Constraint: rt even register, rt2=rt+1.
+  BufferOffset as_strexd(Register rd, Register rt, Register rt2, Register rn,
+                         Condition c = Always);
+
+  // STREX rd, rt, [rn].  Constraint: rd != rn, rd != rt.
+  BufferOffset as_strex(Register rd, Register rt, Register rn,
+                        Condition c = Always);
+  BufferOffset as_strexh(Register rd, Register rt, Register rn,
+                         Condition c = Always);
+  BufferOffset as_strexb(Register rd, Register rt, Register rn,
+                         Condition c = Always);
+
+  // CLREX
+  BufferOffset as_clrex();
+
+  // Memory synchronization.
+  // These are available from ARMv7 forward.
+  BufferOffset as_dmb(BarrierOption option = BarrierSY);
+  BufferOffset as_dsb(BarrierOption option = BarrierSY);
+  BufferOffset as_isb();
+
+  // Memory synchronization for architectures before ARMv7.
+  BufferOffset as_dsb_trap();
+  BufferOffset as_dmb_trap();
+  BufferOffset as_isb_trap();
+
+  // Speculation barrier
+  BufferOffset as_csdb();
+
+  // Control flow stuff:
+
+  // bx can *only* branch to a register never to an immediate.
+  BufferOffset as_bx(Register r, Condition c = Always);
+
+  // Branch can branch to an immediate *or* to a register. Branches to
+  // immediates are pc relative, branches to registers are absolute.
+  BufferOffset as_b(BOffImm off, Condition c, Label* documentation = nullptr);
+
+  BufferOffset as_b(Label* l, Condition c = Always);
+  BufferOffset as_b(BOffImm off, Condition c, BufferOffset inst);
+
+  // blx can go to either an immediate or a register. When blx'ing to a
+  // register, we change processor mode depending on the low bit of the
+  // register when blx'ing to an immediate, we *always* change processor
+  // state.
+  BufferOffset as_blx(Label* l);
+
+  BufferOffset as_blx(Register r, Condition c = Always);
+  BufferOffset as_bl(BOffImm off, Condition c, Label* documentation = nullptr);
+  // bl can only branch+link to an immediate, never to a register it never
+  // changes processor state.
+  BufferOffset as_bl();
+  // bl #imm can have a condition code, blx #imm cannot.
+  // blx reg can be conditional.
+  BufferOffset as_bl(Label* l, Condition c);
+  BufferOffset as_bl(BOffImm off, Condition c, BufferOffset inst);
+
+  BufferOffset as_mrs(Register r, Condition c = Always);
+  BufferOffset as_msr(Register r, Condition c = Always);
+
+  // VFP instructions!
+ private:
+  enum vfp_size { IsDouble = 1 << 8, IsSingle = 0 << 8 };
+
+  BufferOffset writeVFPInst(vfp_size sz, uint32_t blob);
+
+  static void WriteVFPInstStatic(vfp_size sz, uint32_t blob, uint32_t* dest);
+
+  // Unityped variants: all registers hold the same (ieee754 single/double)
+  // notably not included are vcvt; vmov vd, #imm; vmov rt, vn.
+  BufferOffset as_vfp_float(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                            VFPOp op, Condition c = Always);
+
+ public:
+  BufferOffset as_vadd(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                       Condition c = Always);
+  BufferOffset as_vdiv(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                       Condition c = Always);
+  BufferOffset as_vmul(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                       Condition c = Always);
+  BufferOffset as_vnmul(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                        Condition c = Always);
+  BufferOffset as_vnmla(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                        Condition c = Always);
+  BufferOffset as_vnmls(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                        Condition c = Always);
+  BufferOffset as_vneg(VFPRegister vd, VFPRegister vm, Condition c = Always);
+  BufferOffset as_vsqrt(VFPRegister vd, VFPRegister vm, Condition c = Always);
+  BufferOffset as_vabs(VFPRegister vd, VFPRegister vm, Condition c = Always);
+  BufferOffset as_vsub(VFPRegister vd, VFPRegister vn, VFPRegister vm,
+                       Condition c = Always);
+  BufferOffset as_vcmp(VFPRegister vd, VFPRegister vm, Condition c = Always);
+  BufferOffset as_vcmpz(VFPRegister vd, Condition c = Always);
+
+  // Specifically, a move between two same sized-registers.
+  BufferOffset as_vmov(VFPRegister vd, VFPRegister vsrc, Condition c = Always);
+
+  // Transfer between Core and VFP.
+  enum FloatToCore_ { FloatToCore = 1 << 20, CoreToFloat = 0 << 20 };
+
+ private:
+  enum VFPXferSize { WordTransfer = 0x02000010, DoubleTransfer = 0x00400010 };
+
+ public:
+  // Unlike the next function, moving between the core registers and vfp
+  // registers can't be *that* properly typed. Namely, since I don't want to
+  // munge the type VFPRegister to also include core registers. Thus, the core
+  // and vfp registers are passed in based on their type, and src/dest is
+  // determined by the float2core.
+
+  BufferOffset as_vxfer(Register vt1, Register vt2, VFPRegister vm,
+                        FloatToCore_ f2c, Condition c = Always, int idx = 0);
+
+  // Our encoding actually allows just the src and the dest (and their types)
+  // to uniquely specify the encoding that we are going to use.
+  BufferOffset as_vcvt(VFPRegister vd, VFPRegister vm, bool useFPSCR = false,
+                       Condition c = Always);
+
+  // Hard coded to a 32 bit fixed width result for now.
+  BufferOffset as_vcvtFixed(VFPRegister vd, bool isSigned, uint32_t fixedPoint,
+                            bool toFixed, Condition c = Always);
+
+  // Transfer between VFP and memory.
+  BufferOffset as_vdtr(LoadStore ls, VFPRegister vd, VFPAddr addr,
+                       Condition c = Always /* vfp doesn't have a wb option*/);
+
+  static void as_vdtr_patch(LoadStore ls, VFPRegister vd, VFPAddr addr,
+                            Condition c /* vfp doesn't have a wb option */,
+                            uint32_t* dest);
+
+  // VFP's ldm/stm work differently from the standard arm ones. You can only
+  // transfer a range.
+
+  BufferOffset as_vdtm(LoadStore st, Register rn, VFPRegister vd, int length,
+                       /* also has update conditions */ Condition c = Always);
+
+  // vldr/vstr variants that handle unaligned accesses.  These encode as NEON
+  // single-element instructions and can only be used if NEON is available.
+  // Here, vd must be tagged as a float or double register.
+  BufferOffset as_vldr_unaligned(VFPRegister vd, Register rn);
+  BufferOffset as_vstr_unaligned(VFPRegister vd, Register rn);
+
+  BufferOffset as_vimm(VFPRegister vd, VFPImm imm, Condition c = Always);
+
+  BufferOffset as_vmrs(Register r, Condition c = Always);
+  BufferOffset as_vmsr(Register r, Condition c = Always);
+
+  // Label operations.
+  bool nextLink(BufferOffset b, BufferOffset* next);
+  void bind(Label* label, BufferOffset boff = BufferOffset());
+  uint32_t currentOffset() { return nextOffset().getOffset(); }
+  void retarget(Label* label, Label* target);
+  // I'm going to pretend this doesn't exist for now.
+  void retarget(Label* label, void* target, RelocationKind reloc);
+
+  static void Bind(uint8_t* rawCode, const CodeLabel& label);
+
+  void as_bkpt();
+  BufferOffset as_illegal_trap();
+
+ public:
+  static void TraceJumpRelocations(JSTracer* trc, JitCode* code,
+                                   CompactBufferReader& reader);
+  static void TraceDataRelocations(JSTracer* trc, JitCode* code,
+                                   CompactBufferReader& reader);
+
+  void assertNoGCThings() const {
+#ifdef DEBUG
+    MOZ_ASSERT(dataRelocations_.length() == 0);
+    for (auto& j : jumps_) {
+      MOZ_ASSERT(j.kind() == RelocationKind::HARDCODED);
+    }
+#endif
+  }
+
+  static bool SupportsFloatingPoint() { return HasVFP(); }
+  static bool SupportsUnalignedAccesses() { return HasARMv7(); }
+  // Note, returning false here is technically wrong, but one has to go via the
+  // as_vldr_unaligned and as_vstr_unaligned instructions to get proper behavior
+  // and those are NEON-specific and have to be asked for specifically.
+  static bool SupportsFastUnalignedFPAccesses() { return false; }
+
+  static bool HasRoundInstruction(RoundingMode mode) { return false; }
+
+ protected:
+  void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind) {
+    enoughMemory_ &= jumps_.append(RelativePatch(target.value, kind));
+    if (kind == RelocationKind::JITCODE) {
+      jumpRelocations_.writeUnsigned(src.getOffset());
+    }
+  }
+
+ public:
+  // The buffer is about to be linked, make sure any constant pools or excess
+  // bookkeeping has been flushed to the instruction stream.
+  void flush() {
+    MOZ_ASSERT(!isFinished);
+    m_buffer.flushPool();
+    return;
+  }
+
+  void comment(const char* msg) {
+#ifdef JS_DISASM_ARM
+    spew_.spew("; %s", msg);
+#endif
+  }
+
+  // Copy the assembly code to the given buffer, and perform any pending
+  // relocations relying on the target address.
+  void executableCopy(uint8_t* buffer);
+
+  // Actual assembly emitting functions.
+
+  // Since I can't think of a reasonable default for the mode, I'm going to
+  // leave it as a required argument.
+  void startDataTransferM(LoadStore ls, Register rm, DTMMode mode,
+                          DTMWriteBack update = NoWriteBack,
+                          Condition c = Always) {
+    MOZ_ASSERT(!dtmActive);
+    dtmUpdate = update;
+    dtmBase = rm;
+    dtmLoadStore = ls;
+    dtmLastReg = -1;
+    dtmRegBitField = 0;
+    dtmActive = 1;
+    dtmCond = c;
+    dtmMode = mode;
+  }
+
+  void transferReg(Register rn) {
+    MOZ_ASSERT(dtmActive);
+    MOZ_ASSERT(rn.code() > dtmLastReg);
+    dtmRegBitField |= 1 << rn.code();
+    if (dtmLoadStore == IsLoad && rn.code() == 13 && dtmBase.code() == 13) {
+      MOZ_CRASH("ARM Spec says this is invalid");
+    }
+  }
+  void finishDataTransfer() {
+    dtmActive = false;
+    as_dtm(dtmLoadStore, dtmBase, dtmRegBitField, dtmMode, dtmUpdate, dtmCond);
+  }
+
+  void startFloatTransferM(LoadStore ls, Register rm, DTMMode mode,
+                           DTMWriteBack update = NoWriteBack,
+                           Condition c = Always) {
+    MOZ_ASSERT(!dtmActive);
+    dtmActive = true;
+    dtmUpdate = update;
+    dtmLoadStore = ls;
+    dtmBase = rm;
+    dtmCond = c;
+    dtmLastReg = -1;
+    dtmMode = mode;
+    dtmDelta = 0;
+  }
+  void transferFloatReg(VFPRegister rn) {
+    if (dtmLastReg == -1) {
+      vdtmFirstReg = rn.code();
+    } else {
+      if (dtmDelta == 0) {
+        dtmDelta = rn.code() - dtmLastReg;
+        MOZ_ASSERT(dtmDelta == 1 || dtmDelta == -1);
+      }
+      MOZ_ASSERT(dtmLastReg >= 0);
+      MOZ_ASSERT(rn.code() == unsigned(dtmLastReg) + dtmDelta);
+    }
+
+    dtmLastReg = rn.code();
+  }
+  void finishFloatTransfer() {
+    MOZ_ASSERT(dtmActive);
+    dtmActive = false;
+    MOZ_ASSERT(dtmLastReg != -1);
+    dtmDelta = dtmDelta ? dtmDelta : 1;
+    // The operand for the vstr/vldr instruction is the lowest register in the
+    // range.
+    int low = std::min(dtmLastReg, vdtmFirstReg);
+    int high = std::max(dtmLastReg, vdtmFirstReg);
+    // Fencepost problem.
+    int len = high - low + 1;
+    // vdtm can only transfer 16 registers at once.  If we need to transfer
+    // more, then either hoops are necessary, or we need to be updating the
+    // register.
+    MOZ_ASSERT_IF(len > 16, dtmUpdate == WriteBack);
+
+    int adjustLow = dtmLoadStore == IsStore ? 0 : 1;
+    int adjustHigh = dtmLoadStore == IsStore ? -1 : 0;
+    while (len > 0) {
+      // Limit the instruction to 16 registers.
+      int curLen = std::min(len, 16);
+      // If it is a store, we want to start at the high end and move down
+      // (e.g. vpush d16-d31; vpush d0-d15).
+      int curStart = (dtmLoadStore == IsStore) ? high - curLen + 1 : low;
+      as_vdtm(dtmLoadStore, dtmBase,
+              VFPRegister(FloatRegister::FromCode(curStart)), curLen, dtmCond);
+      // Update the bounds.
+      low += adjustLow * curLen;
+      high += adjustHigh * curLen;
+      // Update the length parameter.
+      len -= curLen;
+    }
+  }
+
+ private:
+  int dtmRegBitField;
+  int vdtmFirstReg;
+  int dtmLastReg;
+  int dtmDelta;
+  Register dtmBase;
+  DTMWriteBack dtmUpdate;
+  DTMMode dtmMode;
+  LoadStore dtmLoadStore;
+  bool dtmActive;
+  Condition dtmCond;
+
+ public:
+  enum {
+    PadForAlign8 = (int)0x00,
+    PadForAlign16 = (int)0x0000,
+    PadForAlign32 = (int)0xe12fff7f  // 'bkpt 0xffff'
+  };
+
+  // API for speaking with the IonAssemblerBufferWithConstantPools generate an
+  // initial placeholder instruction that we want to later fix up.
+  static void InsertIndexIntoTag(uint8_t* load, uint32_t index);
+
+  // Take the stub value that was written in before, and write in an actual
+  // load using the index we'd computed previously as well as the address of
+  // the pool start.
+  static void PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr);
+
+  // We're not tracking short-range branches for ARM for now.
+  static void PatchShortRangeBranchToVeneer(ARMBuffer*, unsigned rangeIdx,
+                                            BufferOffset deadline,
+                                            BufferOffset veneer) {
+    MOZ_CRASH();
+  }
+  // END API
+
+  // Move our entire pool into the instruction stream. This is to force an
+  // opportunistic dump of the pool, prefferably when it is more convenient to
+  // do a dump.
+  void flushBuffer();
+  void enterNoPool(size_t maxInst);
+  void leaveNoPool();
+  void enterNoNops();
+  void leaveNoNops();
+
+  static void WritePoolHeader(uint8_t* start, Pool* p, bool isNatural);
+  static void WritePoolGuard(BufferOffset branch, Instruction* inst,
+                             BufferOffset dest);
+
+  static uint32_t PatchWrite_NearCallSize();
+  static uint32_t NopSize() { return 4; }
+  static void PatchWrite_NearCall(CodeLocationLabel start,
+                                  CodeLocationLabel toCall);
+  static void PatchDataWithValueCheck(CodeLocationLabel label,
+                                      PatchedImmPtr newValue,
+                                      PatchedImmPtr expectedValue);
+  static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue,
+                                      ImmPtr expectedValue);
+  static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm);
+
+  static uint32_t AlignDoubleArg(uint32_t offset) { return (offset + 1) & ~1; }
+  static uint8_t* NextInstruction(uint8_t* instruction,
+                                  uint32_t* count = nullptr);
+
+  // Toggle a jmp or cmp emitted by toggledJump().
+  static void ToggleToJmp(CodeLocationLabel inst_);
+  static void ToggleToCmp(CodeLocationLabel inst_);
+
+  static size_t ToggledCallSize(uint8_t* code);
+  static void ToggleCall(CodeLocationLabel inst_, bool enabled);
+
+  void processCodeLabels(uint8_t* rawCode);
+
+  void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
+                                   const Disassembler::HeapAccess& heapAccess) {
+    // Implement this if we implement a disassembler.
+  }
+};  // Assembler
+
+// An Instruction is a structure for both encoding and decoding any and all ARM
+// instructions. Many classes have not been implemented thus far.
+class Instruction {
+  uint32_t data;
+
+ protected:
+  // This is not for defaulting to always, this is for instructions that
+  // cannot be made conditional, and have the usually invalid 4b1111 cond
+  // field.
+  explicit Instruction(uint32_t data_, bool fake = false)
+      : data(data_ | 0xf0000000) {
+    MOZ_ASSERT(fake || ((data_ & 0xf0000000) == 0));
+  }
+  // Standard constructor.
+  Instruction(uint32_t data_, Assembler::Condition c)
+      : data(data_ | (uint32_t)c) {
+    MOZ_ASSERT((data_ & 0xf0000000) == 0);
+  }
+  // You should never create an instruction directly. You should create a more
+  // specific instruction which will eventually call one of these constructors
+  // for you.
+ public:
+  uint32_t encode() const { return data; }
+  // Check if this instruction is really a particular case.
+  template <class C>
+  bool is() const {
+    return C::IsTHIS(*this);
+  }
+
+  // Safely get a more specific variant of this pointer.
+  template <class C>
+  C* as() const {
+    return C::AsTHIS(*this);
+  }
+
+  const Instruction& operator=(Instruction src) {
+    data = src.data;
+    return *this;
+  }
+  // Since almost all instructions have condition codes, the condition code
+  // extractor resides in the base class.
+  Assembler::Condition extractCond() const {
+    MOZ_ASSERT(data >> 28 != 0xf,
+               "The instruction does not have condition code");
+    return (Assembler::Condition)(data & 0xf0000000);
+  }
+
+  // Sometimes, an api wants a uint32_t (or a pointer to it) rather than an
+  // instruction. raw() just coerces this into a pointer to a uint32_t.
+  const uint32_t* raw() const { return &data; }
+  uint32_t size() const { return 4; }
+};  // Instruction
+
+// Make sure that it is the right size.
+static_assert(sizeof(Instruction) == 4);
+
+inline void InstructionIterator::advanceRaw(ptrdiff_t instructions) {
+  inst_ = inst_ + instructions;
+}
+
+// Data Transfer Instructions.
+class InstDTR : public Instruction {
+ public:
+  enum IsByte_ { IsByte = 0x00400000, IsWord = 0x00000000 };
+  static const int IsDTR = 0x04000000;
+  static const int IsDTRMask = 0x0c000000;
+
+  // TODO: Replace the initialization with something that is safer.
+  InstDTR(LoadStore ls, IsByte_ ib, Index mode, Register rt, DTRAddr addr,
+          Assembler::Condition c)
+      : Instruction(std::underlying_type_t<LoadStore>(ls) |
+                        std::underlying_type_t<IsByte_>(ib) |
+                        std::underlying_type_t<Index>(mode) | RT(rt) |
+                        addr.encode() | IsDTR,
+                    c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstDTR* AsTHIS(const Instruction& i);
+};
+static_assert(sizeof(InstDTR) == sizeof(Instruction));
+
+class InstLDR : public InstDTR {
+ public:
+  InstLDR(Index mode, Register rt, DTRAddr addr, Assembler::Condition c)
+      : InstDTR(IsLoad, IsWord, mode, rt, addr, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstLDR* AsTHIS(const Instruction& i);
+
+  int32_t signedOffset() const {
+    int32_t offset = encode() & 0xfff;
+    if (IsUp_(encode() & IsUp) != IsUp) {
+      return -offset;
+    }
+    return offset;
+  }
+  uint32_t* dest() const {
+    int32_t offset = signedOffset();
+    // When patching the load in PatchConstantPoolLoad, we ensure that the
+    // offset is a multiple of 4, offset by 8 bytes from the actual
+    // location.  Indeed, when the base register is PC, ARM's 3 stages
+    // pipeline design makes it that PC is off by 8 bytes (= 2 *
+    // sizeof(uint32*)) when we actually executed it.
+    MOZ_ASSERT(offset % 4 == 0);
+    offset >>= 2;
+    return (uint32_t*)raw() + offset + 2;
+  }
+};
+static_assert(sizeof(InstDTR) == sizeof(InstLDR));
+
+class InstNOP : public Instruction {
+ public:
+  static const uint32_t NopInst = 0x0320f000;
+
+  InstNOP() : Instruction(NopInst, Assembler::Always) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstNOP* AsTHIS(Instruction& i);
+};
+
+// Branching to a register, or calling a register
+class InstBranchReg : public Instruction {
+ protected:
+  // Don't use BranchTag yourself, use a derived instruction.
+  enum BranchTag { IsBX = 0x012fff10, IsBLX = 0x012fff30 };
+
+  static const uint32_t IsBRegMask = 0x0ffffff0;
+
+  InstBranchReg(BranchTag tag, Register rm, Assembler::Condition c)
+      : Instruction(tag | rm.code(), c) {}
+
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstBranchReg* AsTHIS(const Instruction& i);
+
+  // Get the register that is being branched to
+  void extractDest(Register* dest);
+  // Make sure we are branching to a pre-known register
+  bool checkDest(Register dest);
+};
+static_assert(sizeof(InstBranchReg) == sizeof(Instruction));
+
+// Branching to an immediate offset, or calling an immediate offset
+class InstBranchImm : public Instruction {
+ protected:
+  enum BranchTag { IsB = 0x0a000000, IsBL = 0x0b000000 };
+
+  static const uint32_t IsBImmMask = 0x0f000000;
+
+  InstBranchImm(BranchTag tag, BOffImm off, Assembler::Condition c)
+      : Instruction(tag | off.encode(), c) {}
+
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstBranchImm* AsTHIS(const Instruction& i);
+
+  void extractImm(BOffImm* dest);
+};
+static_assert(sizeof(InstBranchImm) == sizeof(Instruction));
+
+// Very specific branching instructions.
+class InstBXReg : public InstBranchReg {
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstBXReg* AsTHIS(const Instruction& i);
+};
+
+class InstBLXReg : public InstBranchReg {
+ public:
+  InstBLXReg(Register reg, Assembler::Condition c)
+      : InstBranchReg(IsBLX, reg, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstBLXReg* AsTHIS(const Instruction& i);
+};
+
+class InstBImm : public InstBranchImm {
+ public:
+  InstBImm(BOffImm off, Assembler::Condition c) : InstBranchImm(IsB, off, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstBImm* AsTHIS(const Instruction& i);
+};
+
+class InstBLImm : public InstBranchImm {
+ public:
+  InstBLImm(BOffImm off, Assembler::Condition c)
+      : InstBranchImm(IsBL, off, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstBLImm* AsTHIS(const Instruction& i);
+};
+
+// Both movw and movt. The layout of both the immediate and the destination
+// register is the same so the code is being shared.
+class InstMovWT : public Instruction {
+ protected:
+  enum WT { IsW = 0x03000000, IsT = 0x03400000 };
+  static const uint32_t IsWTMask = 0x0ff00000;
+
+  InstMovWT(Register rd, Imm16 imm, WT wt, Assembler::Condition c)
+      : Instruction(RD(rd) | imm.encode() | wt, c) {}
+
+ public:
+  void extractImm(Imm16* dest);
+  void extractDest(Register* dest);
+  bool checkImm(Imm16 dest);
+  bool checkDest(Register dest);
+
+  static bool IsTHIS(Instruction& i);
+  static InstMovWT* AsTHIS(Instruction& i);
+};
+static_assert(sizeof(InstMovWT) == sizeof(Instruction));
+
+class InstMovW : public InstMovWT {
+ public:
+  InstMovW(Register rd, Imm16 imm, Assembler::Condition c)
+      : InstMovWT(rd, imm, IsW, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstMovW* AsTHIS(const Instruction& i);
+};
+
+class InstMovT : public InstMovWT {
+ public:
+  InstMovT(Register rd, Imm16 imm, Assembler::Condition c)
+      : InstMovWT(rd, imm, IsT, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstMovT* AsTHIS(const Instruction& i);
+};
+
+class InstALU : public Instruction {
+  static const int32_t ALUMask = 0xc << 24;
+
+ public:
+  InstALU(Register rd, Register rn, Operand2 op2, ALUOp op, SBit s,
+          Assembler::Condition c)
+      : Instruction(maybeRD(rd) | maybeRN(rn) | op2.encode() | op | s, c) {}
+
+  static bool IsTHIS(const Instruction& i);
+  static InstALU* AsTHIS(const Instruction& i);
+
+  void extractOp(ALUOp* ret);
+  bool checkOp(ALUOp op);
+  void extractDest(Register* ret);
+  bool checkDest(Register rd);
+  void extractOp1(Register* ret);
+  bool checkOp1(Register rn);
+  Operand2 extractOp2();
+};
+
+class InstCMP : public InstALU {
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstCMP* AsTHIS(const Instruction& i);
+};
+
+class InstMOV : public InstALU {
+ public:
+  static bool IsTHIS(const Instruction& i);
+  static InstMOV* AsTHIS(const Instruction& i);
+};
+
+// Compile-time iterator over instructions, with a safe interface that
+// references not-necessarily-linear Instructions by linear BufferOffset.
+class BufferInstructionIterator
+    : public ARMBuffer::AssemblerBufferInstIterator {
+ public:
+  BufferInstructionIterator(BufferOffset bo, ARMBuffer* buffer)
+      : ARMBuffer::AssemblerBufferInstIterator(bo, buffer) {}
+
+  // Advances the buffer to the next intentionally-inserted instruction.
+  Instruction* next() {
+    advance(cur()->size());
+    maybeSkipAutomaticInstructions();
+    return cur();
+  }
+
+  // Advances the BufferOffset past any automatically-inserted instructions.
+  Instruction* maybeSkipAutomaticInstructions();
+};
+
+static const uint32_t NumIntArgRegs = 4;
+
+// There are 16 *float* registers available for arguments
+// If doubles are used, only half the number of registers are available.
+static const uint32_t NumFloatArgRegs = 16;
+
+static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+                                Register* out) {
+  if (usedIntArgs >= NumIntArgRegs) {
+    return false;
+  }
+
+  *out = Register::FromCode(usedIntArgs);
+  return true;
+}
+
+// Get a register in which we plan to put a quantity that will be used as an
+// integer argument. This differs from GetIntArgReg in that if we have no more
+// actual argument registers to use we will fall back on using whatever
+// CallTempReg* don't overlap the argument registers, and only fail once those
+// run out too.
+static inline bool GetTempRegForIntArg(uint32_t usedIntArgs,
+                                       uint32_t usedFloatArgs, Register* out) {
+  if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) {
+    return true;
+  }
+
+  // Unfortunately, we have to assume things about the point at which
+  // GetIntArgReg returns false, because we need to know how many registers it
+  // can allocate.
+  usedIntArgs -= NumIntArgRegs;
+  if (usedIntArgs >= NumCallTempNonArgRegs) {
+    return false;
+  }
+
+  *out = CallTempNonArgRegs[usedIntArgs];
+  return true;
+}
+
+#if defined(JS_CODEGEN_ARM_HARDFP) || defined(JS_SIMULATOR_ARM)
+
+static inline bool GetFloat32ArgReg(uint32_t usedIntArgs,
+                                    uint32_t usedFloatArgs,
+                                    FloatRegister* out) {
+  MOZ_ASSERT(UseHardFpABI());
+  if (usedFloatArgs >= NumFloatArgRegs) {
+    return false;
+  }
+  *out = VFPRegister(usedFloatArgs, VFPRegister::Single);
+  return true;
+}
+static inline bool GetDoubleArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+                                   FloatRegister* out) {
+  MOZ_ASSERT(UseHardFpABI());
+  MOZ_ASSERT((usedFloatArgs % 2) == 0);
+  if (usedFloatArgs >= NumFloatArgRegs) {
+    return false;
+  }
+  *out = VFPRegister(usedFloatArgs >> 1, VFPRegister::Double);
+  return true;
+}
+
+#endif
+
+class DoubleEncoder {
+  struct DoubleEntry {
+    uint32_t dblTop;
+    datastore::Imm8VFPImmData data;
+  };
+
+  static const DoubleEntry table[256];
+
+ public:
+  bool lookup(uint32_t top, datastore::Imm8VFPImmData* ret) const {
+    for (int i = 0; i < 256; i++) {
+      if (table[i].dblTop == top) {
+        *ret = table[i].data;
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+// Forbids nop filling for testing purposes. Not nestable.
+class AutoForbidNops {
+ protected:
+  Assembler* masm_;
+
+ public:
+  explicit AutoForbidNops(Assembler* masm) : masm_(masm) {
+    masm_->enterNoNops();
+  }
+  ~AutoForbidNops() { masm_->leaveNoNops(); }
+};
+
+class AutoForbidPoolsAndNops : public AutoForbidNops {
+ public:
+  // The maxInst argument is the maximum number of word sized instructions
+  // that will be allocated within this context. It is used to determine if
+  // the pool needs to be dumped before entering this content. The debug code
+  // checks that no more than maxInst instructions are actually allocated.
+  //
+  // Allocation of pool entries is not supported within this content so the
+  // code can not use large integers or float constants etc.
+  AutoForbidPoolsAndNops(Assembler* masm, size_t maxInst)
+      : AutoForbidNops(masm) {
+    masm_->enterNoPool(maxInst);
+  }
+
+  ~AutoForbidPoolsAndNops() { masm_->leaveNoPool(); }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_Assembler_arm_h */
diff --git a/js/src/jit/arm/CodeGenerator-arm.cpp b/js/src/jit/arm/CodeGenerator-arm.cpp
new file mode 100644
index 0000000000..1526be81c9
--- /dev/null
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -0,0 +1,3154 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/CodeGenerator-arm.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include <iterator>
+
+#include "jsnum.h"
+
+#include "jit/CodeGenerator.h"
+#include "jit/InlineScriptTree.h"
+#include "jit/JitRuntime.h"
+#include "jit/MIR.h"
+#include "jit/MIRGraph.h"
+#include "js/Conversions.h"
+#include "js/ScalarType.h"  // js::Scalar::Type
+#include "vm/JSContext.h"
+#include "vm/Realm.h"
+#include "vm/Shape.h"
+
+#include "jit/MacroAssembler-inl.h"
+#include "jit/shared/CodeGenerator-shared-inl.h"
+#include "vm/JSScript-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using JS::GenericNaN;
+using JS::ToInt32;
+using mozilla::DebugOnly;
+using mozilla::FloorLog2;
+using mozilla::NegativeInfinity;
+
+// shared
+CodeGeneratorARM::CodeGeneratorARM(MIRGenerator* gen, LIRGraph* graph,
+                                   MacroAssembler* masm)
+    : CodeGeneratorShared(gen, graph, masm) {}
+
+Register64 CodeGeneratorARM::ToOperandOrRegister64(
+    const LInt64Allocation input) {
+  return ToRegister64(input);
+}
+
+void CodeGeneratorARM::emitBranch(Assembler::Condition cond,
+                                  MBasicBlock* mirTrue, MBasicBlock* mirFalse) {
+  if (isNextBlock(mirFalse->lir())) {
+    jumpToBlock(mirTrue, cond);
+  } else {
+    jumpToBlock(mirFalse, Assembler::InvertCondition(cond));
+    jumpToBlock(mirTrue);
+  }
+}
+
+void OutOfLineBailout::accept(CodeGeneratorARM* codegen) {
+  codegen->visitOutOfLineBailout(this);
+}
+
+void CodeGenerator::visitTestIAndBranch(LTestIAndBranch* test) {
+  const LAllocation* opd = test->getOperand(0);
+  MBasicBlock* ifTrue = test->ifTrue();
+  MBasicBlock* ifFalse = test->ifFalse();
+
+  // Test the operand
+  masm.as_cmp(ToRegister(opd), Imm8(0));
+
+  if (isNextBlock(ifFalse->lir())) {
+    jumpToBlock(ifTrue, Assembler::NonZero);
+  } else if (isNextBlock(ifTrue->lir())) {
+    jumpToBlock(ifFalse, Assembler::Zero);
+  } else {
+    jumpToBlock(ifFalse, Assembler::Zero);
+    jumpToBlock(ifTrue);
+  }
+}
+
+void CodeGenerator::visitCompare(LCompare* comp) {
+  Assembler::Condition cond =
+      JSOpToCondition(comp->mir()->compareType(), comp->jsop());
+  const LAllocation* left = comp->getOperand(0);
+  const LAllocation* right = comp->getOperand(1);
+  const LDefinition* def = comp->getDef(0);
+
+  ScratchRegisterScope scratch(masm);
+
+  if (right->isConstant()) {
+    masm.ma_cmp(ToRegister(left), Imm32(ToInt32(right)), scratch);
+  } else if (right->isRegister()) {
+    masm.ma_cmp(ToRegister(left), ToRegister(right));
+  } else {
+    SecondScratchRegisterScope scratch2(masm);
+    masm.ma_cmp(ToRegister(left), Operand(ToAddress(right)), scratch, scratch2);
+  }
+  masm.ma_mov(Imm32(0), ToRegister(def));
+  masm.ma_mov(Imm32(1), ToRegister(def), cond);
+}
+
+void CodeGenerator::visitCompareAndBranch(LCompareAndBranch* comp) {
+  Assembler::Condition cond =
+      JSOpToCondition(comp->cmpMir()->compareType(), comp->jsop());
+  const LAllocation* left = comp->left();
+  const LAllocation* right = comp->right();
+
+  ScratchRegisterScope scratch(masm);
+
+  if (right->isConstant()) {
+    masm.ma_cmp(ToRegister(left), Imm32(ToInt32(right)), scratch);
+  } else if (right->isRegister()) {
+    masm.ma_cmp(ToRegister(left), ToRegister(right));
+  } else {
+    SecondScratchRegisterScope scratch2(masm);
+    masm.ma_cmp(ToRegister(left), Operand(ToAddress(right)), scratch, scratch2);
+  }
+  emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+bool CodeGeneratorARM::generateOutOfLineCode() {
+  if (!CodeGeneratorShared::generateOutOfLineCode()) {
+    return false;
+  }
+
+  if (deoptLabel_.used()) {
+    // All non-table-based bailouts will go here.
+    masm.bind(&deoptLabel_);
+
+    // Push the frame size, so the handler can recover the IonScript.
+    masm.push(Imm32(frameSize()));
+
+    TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler();
+    masm.jump(handler);
+  }
+
+  return !masm.oom();
+}
+
+void CodeGeneratorARM::bailoutIf(Assembler::Condition condition,
+                                 LSnapshot* snapshot) {
+  encode(snapshot);
+
+  InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+  OutOfLineBailout* ool =
+      new (alloc()) OutOfLineBailout(snapshot, masm.framePushed());
+
+  // All bailout code is associated with the bytecodeSite of the block we are
+  // bailing out from.
+  addOutOfLineCode(ool,
+                   new (alloc()) BytecodeSite(tree, tree->script()->code()));
+
+  masm.ma_b(ool->entry(), condition);
+}
+
+void CodeGeneratorARM::bailoutFrom(Label* label, LSnapshot* snapshot) {
+  MOZ_ASSERT_IF(!masm.oom(), label->used());
+  MOZ_ASSERT_IF(!masm.oom(), !label->bound());
+
+  encode(snapshot);
+
+  InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+  OutOfLineBailout* ool =
+      new (alloc()) OutOfLineBailout(snapshot, masm.framePushed());
+
+  // All bailout code is associated with the bytecodeSite of the block we are
+  // bailing out from.
+  addOutOfLineCode(ool,
+                   new (alloc()) BytecodeSite(tree, tree->script()->code()));
+
+  masm.retarget(label, ool->entry());
+}
+
+void CodeGeneratorARM::bailout(LSnapshot* snapshot) {
+  Label label;
+  masm.ma_b(&label);
+  bailoutFrom(&label, snapshot);
+}
+
+void CodeGeneratorARM::visitOutOfLineBailout(OutOfLineBailout* ool) {
+  masm.push(Imm32(ool->snapshot()->snapshotOffset()));
+  masm.ma_b(&deoptLabel_);
+}
+
+void CodeGenerator::visitMinMaxD(LMinMaxD* ins) {
+  FloatRegister first = ToFloatRegister(ins->first());
+  FloatRegister second = ToFloatRegister(ins->second());
+
+  MOZ_ASSERT(first == ToFloatRegister(ins->output()));
+
+  if (ins->mir()->isMax()) {
+    masm.maxDouble(second, first, true);
+  } else {
+    masm.minDouble(second, first, true);
+  }
+}
+
+void CodeGenerator::visitMinMaxF(LMinMaxF* ins) {
+  FloatRegister first = ToFloatRegister(ins->first());
+  FloatRegister second = ToFloatRegister(ins->second());
+
+  MOZ_ASSERT(first == ToFloatRegister(ins->output()));
+
+  if (ins->mir()->isMax()) {
+    masm.maxFloat32(second, first, true);
+  } else {
+    masm.minFloat32(second, first, true);
+  }
+}
+
+void CodeGenerator::visitAddI(LAddI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+
+  ScratchRegisterScope scratch(masm);
+
+  if (rhs->isConstant()) {
+    masm.ma_add(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), scratch,
+                SetCC);
+  } else if (rhs->isRegister()) {
+    masm.ma_add(ToRegister(lhs), ToRegister(rhs), ToRegister(dest), SetCC);
+  } else {
+    masm.ma_add(ToRegister(lhs), Operand(ToAddress(rhs)), ToRegister(dest),
+                SetCC);
+  }
+
+  if (ins->snapshot()) {
+    bailoutIf(Assembler::Overflow, ins->snapshot());
+  }
+}
+
+void CodeGenerator::visitAddI64(LAddI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LAddI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LAddI64::Rhs);
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  if (IsConstant(rhs)) {
+    masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+    return;
+  }
+
+  masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+}
+
+void CodeGenerator::visitSubI(LSubI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+
+  ScratchRegisterScope scratch(masm);
+
+  if (rhs->isConstant()) {
+    masm.ma_sub(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), scratch,
+                SetCC);
+  } else if (rhs->isRegister()) {
+    masm.ma_sub(ToRegister(lhs), ToRegister(rhs), ToRegister(dest), SetCC);
+  } else {
+    masm.ma_sub(ToRegister(lhs), Operand(ToAddress(rhs)), ToRegister(dest),
+                SetCC);
+  }
+
+  if (ins->snapshot()) {
+    bailoutIf(Assembler::Overflow, ins->snapshot());
+  }
+}
+
+void CodeGenerator::visitSubI64(LSubI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs);
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  if (IsConstant(rhs)) {
+    masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+    return;
+  }
+
+  masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+}
+
+void CodeGenerator::visitMulI(LMulI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+  MMul* mul = ins->mir();
+  MOZ_ASSERT_IF(mul->mode() == MMul::Integer,
+                !mul->canBeNegativeZero() && !mul->canOverflow());
+
+  if (rhs->isConstant()) {
+    // Bailout when this condition is met.
+    Assembler::Condition c = Assembler::Overflow;
+    // Bailout on -0.0
+    int32_t constant = ToInt32(rhs);
+    if (mul->canBeNegativeZero() && constant <= 0) {
+      Assembler::Condition bailoutCond =
+          (constant == 0) ? Assembler::LessThan : Assembler::Equal;
+      masm.as_cmp(ToRegister(lhs), Imm8(0));
+      bailoutIf(bailoutCond, ins->snapshot());
+    }
+    // TODO: move these to ma_mul.
+    switch (constant) {
+      case -1:
+        masm.as_rsb(ToRegister(dest), ToRegister(lhs), Imm8(0), SetCC);
+        break;
+      case 0:
+        masm.ma_mov(Imm32(0), ToRegister(dest));
+        return;  // Escape overflow check;
+      case 1:
+        // Nop
+        masm.ma_mov(ToRegister(lhs), ToRegister(dest));
+        return;  // Escape overflow check;
+      case 2:
+        masm.ma_add(ToRegister(lhs), ToRegister(lhs), ToRegister(dest), SetCC);
+        // Overflow is handled later.
+        break;
+      default: {
+        bool handled = false;
+        if (constant > 0) {
+          // Try shift and add sequences for a positive constant.
+          if (!mul->canOverflow()) {
+            // If it cannot overflow, we can do lots of optimizations.
+            Register src = ToRegister(lhs);
+            uint32_t shift = FloorLog2(constant);
+            uint32_t rest = constant - (1 << shift);
+            // See if the constant has one bit set, meaning it can be
+            // encoded as a bitshift.
+            if ((1 << shift) == constant) {
+              masm.ma_lsl(Imm32(shift), src, ToRegister(dest));
+              handled = true;
+            } else {
+              // If the constant cannot be encoded as (1 << C1), see
+              // if it can be encoded as (1 << C1) | (1 << C2), which
+              // can be computed using an add and a shift.
+              uint32_t shift_rest = FloorLog2(rest);
+              if ((1u << shift_rest) == rest) {
+                masm.as_add(ToRegister(dest), src,
+                            lsl(src, shift - shift_rest));
+                if (shift_rest != 0) {
+                  masm.ma_lsl(Imm32(shift_rest), ToRegister(dest),
+                              ToRegister(dest));
+                }
+                handled = true;
+              }
+            }
+          } else if (ToRegister(lhs) != ToRegister(dest)) {
+            // To stay on the safe side, only optimize things that are a
+            // power of 2.
+
+            uint32_t shift = FloorLog2(constant);
+            if ((1 << shift) == constant) {
+              // dest = lhs * pow(2,shift)
+              masm.ma_lsl(Imm32(shift), ToRegister(lhs), ToRegister(dest));
+              // At runtime, check (lhs == dest >> shift), if this
+              // does not hold, some bits were lost due to overflow,
+              // and the computation should be resumed as a double.
+              masm.as_cmp(ToRegister(lhs), asr(ToRegister(dest), shift));
+              c = Assembler::NotEqual;
+              handled = true;
+            }
+          }
+        }
+
+        if (!handled) {
+          ScratchRegisterScope scratch(masm);
+          if (mul->canOverflow()) {
+            c = masm.ma_check_mul(ToRegister(lhs), Imm32(ToInt32(rhs)),
+                                  ToRegister(dest), scratch, c);
+          } else {
+            masm.ma_mul(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest),
+                        scratch);
+          }
+        }
+      }
+    }
+    // Bailout on overflow.
+    if (mul->canOverflow()) {
+      bailoutIf(c, ins->snapshot());
+    }
+  } else {
+    Assembler::Condition c = Assembler::Overflow;
+
+    if (mul->canOverflow()) {
+      ScratchRegisterScope scratch(masm);
+      c = masm.ma_check_mul(ToRegister(lhs), ToRegister(rhs), ToRegister(dest),
+                            scratch, c);
+    } else {
+      masm.ma_mul(ToRegister(lhs), ToRegister(rhs), ToRegister(dest));
+    }
+
+    // Bailout on overflow.
+    if (mul->canOverflow()) {
+      bailoutIf(c, ins->snapshot());
+    }
+
+    if (mul->canBeNegativeZero()) {
+      Label done;
+      masm.as_cmp(ToRegister(dest), Imm8(0));
+      masm.ma_b(&done, Assembler::NotEqual);
+
+      // Result is -0 if lhs or rhs is negative.
+      masm.ma_cmn(ToRegister(lhs), ToRegister(rhs));
+      bailoutIf(Assembler::Signed, ins->snapshot());
+
+      masm.bind(&done);
+    }
+  }
+}
+
+void CodeGenerator::visitMulI64(LMulI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
+
+  MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir));
+
+  if (IsConstant(rhs)) {
+    int64_t constant = ToInt64(rhs);
+    switch (constant) {
+      case -1:
+        masm.neg64(ToRegister64(lhs));
+        return;
+      case 0:
+        masm.xor64(ToRegister64(lhs), ToRegister64(lhs));
+        return;
+      case 1:
+        // nop
+        return;
+      case 2:
+        masm.add64(ToRegister64(lhs), ToRegister64(lhs));
+        return;
+      default:
+        if (constant > 0) {
+          // Use shift if constant is power of 2.
+          int32_t shift = mozilla::FloorLog2(constant);
+          if (int64_t(1) << shift == constant) {
+            masm.lshift64(Imm32(shift), ToRegister64(lhs));
+            return;
+          }
+        }
+        Register temp = ToTempRegisterOrInvalid(lir->temp());
+        masm.mul64(Imm64(constant), ToRegister64(lhs), temp);
+    }
+  } else {
+    Register temp = ToTempRegisterOrInvalid(lir->temp());
+    masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
+  }
+}
+
+void CodeGeneratorARM::divICommon(MDiv* mir, Register lhs, Register rhs,
+                                  Register output, LSnapshot* snapshot,
+                                  Label& done) {
+  ScratchRegisterScope scratch(masm);
+
+  if (mir->canBeNegativeOverflow()) {
+    // Handle INT32_MIN / -1;
+    // The integer division will give INT32_MIN, but we want -(double)INT32_MIN.
+
+    // Sets EQ if lhs == INT32_MIN.
+    masm.ma_cmp(lhs, Imm32(INT32_MIN), scratch);
+    // If EQ (LHS == INT32_MIN), sets EQ if rhs == -1.
+    masm.ma_cmp(rhs, Imm32(-1), scratch, Assembler::Equal);
+    if (mir->canTruncateOverflow()) {
+      if (mir->trapOnError()) {
+        Label ok;
+        masm.ma_b(&ok, Assembler::NotEqual);
+        masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset());
+        masm.bind(&ok);
+      } else {
+        // (-INT32_MIN)|0 = INT32_MIN
+        Label skip;
+        masm.ma_b(&skip, Assembler::NotEqual);
+        masm.ma_mov(Imm32(INT32_MIN), output);
+        masm.ma_b(&done);
+        masm.bind(&skip);
+      }
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, snapshot);
+    }
+  }
+
+  // Handle divide by zero.
+  if (mir->canBeDivideByZero()) {
+    masm.as_cmp(rhs, Imm8(0));
+    if (mir->canTruncateInfinities()) {
+      if (mir->trapOnError()) {
+        Label nonZero;
+        masm.ma_b(&nonZero, Assembler::NotEqual);
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+        masm.bind(&nonZero);
+      } else {
+        // Infinity|0 == 0
+        Label skip;
+        masm.ma_b(&skip, Assembler::NotEqual);
+        masm.ma_mov(Imm32(0), output);
+        masm.ma_b(&done);
+        masm.bind(&skip);
+      }
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, snapshot);
+    }
+  }
+
+  // Handle negative 0.
+  if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
+    Label nonzero;
+    masm.as_cmp(lhs, Imm8(0));
+    masm.ma_b(&nonzero, Assembler::NotEqual);
+    masm.as_cmp(rhs, Imm8(0));
+    MOZ_ASSERT(mir->fallible());
+    bailoutIf(Assembler::LessThan, snapshot);
+    masm.bind(&nonzero);
+  }
+}
+
+void CodeGenerator::visitDivI(LDivI* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register temp = ToRegister(ins->getTemp(0));
+  Register output = ToRegister(ins->output());
+  MDiv* mir = ins->mir();
+
+  Label done;
+  divICommon(mir, lhs, rhs, output, ins->snapshot(), done);
+
+  if (mir->canTruncateRemainder()) {
+    masm.ma_sdiv(lhs, rhs, output);
+  } else {
+    {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_sdiv(lhs, rhs, temp);
+      masm.ma_mul(temp, rhs, scratch);
+      masm.ma_cmp(lhs, scratch);
+    }
+    bailoutIf(Assembler::NotEqual, ins->snapshot());
+    masm.ma_mov(temp, output);
+  }
+
+  masm.bind(&done);
+}
+
+extern "C" {
+extern MOZ_EXPORT int64_t __aeabi_idivmod(int, int);
+extern MOZ_EXPORT int64_t __aeabi_uidivmod(int, int);
+}
+
+void CodeGenerator::visitSoftDivI(LSoftDivI* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+  MDiv* mir = ins->mir();
+
+  Label done;
+  divICommon(mir, lhs, rhs, output, ins->snapshot(), done);
+
+  if (gen->compilingWasm()) {
+    masm.Push(InstanceReg);
+    int32_t framePushedAfterInstance = masm.framePushed();
+    masm.setupWasmABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+    masm.callWithABI(mir->bytecodeOffset(),
+                     wasm::SymbolicAddress::aeabi_idivmod,
+                     mozilla::Some(instanceOffset));
+    masm.Pop(InstanceReg);
+  } else {
+    using Fn = int64_t (*)(int, int);
+    masm.setupAlignedABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    masm.callWithABI<Fn, __aeabi_idivmod>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+  }
+
+  // idivmod returns the quotient in r0, and the remainder in r1.
+  if (!mir->canTruncateRemainder()) {
+    MOZ_ASSERT(mir->fallible());
+    masm.as_cmp(r1, Imm8(0));
+    bailoutIf(Assembler::NonZero, ins->snapshot());
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
+  MDiv* mir = ins->mir();
+  Register lhs = ToRegister(ins->numerator());
+  Register output = ToRegister(ins->output());
+  int32_t shift = ins->shift();
+
+  if (shift == 0) {
+    masm.ma_mov(lhs, output);
+    return;
+  }
+
+  if (!mir->isTruncated()) {
+    // If the remainder is != 0, bailout since this must be a double.
+    {
+      // The bailout code also needs the scratch register.
+      // Here it is only used as a dummy target to set CC flags.
+      ScratchRegisterScope scratch(masm);
+      masm.as_mov(scratch, lsl(lhs, 32 - shift), SetCC);
+    }
+    bailoutIf(Assembler::NonZero, ins->snapshot());
+  }
+
+  if (!mir->canBeNegativeDividend()) {
+    // Numerator is unsigned, so needs no adjusting. Do the shift.
+    masm.as_mov(output, asr(lhs, shift));
+    return;
+  }
+
+  // Adjust the value so that shifting produces a correctly rounded result
+  // when the numerator is negative. See 10-1 "Signed Division by a Known
+  // Power of 2" in Henry S. Warren, Jr.'s Hacker's Delight.
+  ScratchRegisterScope scratch(masm);
+
+  if (shift > 1) {
+    masm.as_mov(scratch, asr(lhs, 31));
+    masm.as_add(scratch, lhs, lsr(scratch, 32 - shift));
+  } else {
+    masm.as_add(scratch, lhs, lsr(lhs, 32 - shift));
+  }
+
+  // Do the shift.
+  masm.as_mov(output, asr(scratch, shift));
+}
+
+void CodeGeneratorARM::modICommon(MMod* mir, Register lhs, Register rhs,
+                                  Register output, LSnapshot* snapshot,
+                                  Label& done) {
+  // X % 0 is bad because it will give garbage (or abort), when it should give
+  // NaN.
+
+  if (mir->canBeDivideByZero()) {
+    masm.as_cmp(rhs, Imm8(0));
+    if (mir->isTruncated()) {
+      Label nonZero;
+      masm.ma_b(&nonZero, Assembler::NotEqual);
+      if (mir->trapOnError()) {
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+      } else {
+        // NaN|0 == 0
+        masm.ma_mov(Imm32(0), output);
+        masm.ma_b(&done);
+      }
+      masm.bind(&nonZero);
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, snapshot);
+    }
+  }
+}
+
+void CodeGenerator::visitModI(LModI* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+  MMod* mir = ins->mir();
+
+  // Contrary to other architectures (notably x86) INT_MIN % -1 doesn't need to
+  // be handled separately. |ma_smod| computes the remainder using the |SDIV|
+  // and the |MLS| instructions. On overflow, |SDIV| truncates the result to
+  // 32-bit and returns INT_MIN, see ARM Architecture Reference Manual, SDIV
+  // instruction.
+  //
+  //   mls(INT_MIN, sdiv(INT_MIN, -1), -1)
+  // = INT_MIN - (sdiv(INT_MIN, -1) * -1)
+  // = INT_MIN - (INT_MIN * -1)
+  // = INT_MIN - INT_MIN
+  // = 0
+  //
+  // And a zero remainder with a negative dividend is already handled below.
+
+  Label done;
+  modICommon(mir, lhs, rhs, output, ins->snapshot(), done);
+
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_smod(lhs, rhs, output, scratch);
+  }
+
+  // If X%Y == 0 and X < 0, then we *actually* wanted to return -0.0.
+  if (mir->canBeNegativeDividend()) {
+    if (mir->isTruncated()) {
+      // -0.0|0 == 0
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      // See if X < 0
+      masm.as_cmp(output, Imm8(0));
+      masm.ma_b(&done, Assembler::NotEqual);
+      masm.as_cmp(lhs, Imm8(0));
+      bailoutIf(Assembler::Signed, ins->snapshot());
+    }
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitSoftModI(LSoftModI* ins) {
+  // Extract the registers from this instruction.
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+  Register callTemp = ToRegister(ins->callTemp());
+  MMod* mir = ins->mir();
+  Label done;
+
+  // Save the lhs in case we end up with a 0 that should be a -0.0 because lhs <
+  // 0.
+  MOZ_ASSERT(callTemp != lhs);
+  MOZ_ASSERT(callTemp != rhs);
+  masm.ma_mov(lhs, callTemp);
+
+  // Prevent INT_MIN % -1.
+  //
+  // |aeabi_idivmod| is allowed to return any arbitrary value when called with
+  // |(INT_MIN, -1)|, see "Run-time ABI for the ARM architecture manual". Most
+  // implementations perform a non-trapping signed integer division and
+  // return the expected result, i.e. INT_MIN. But since we can't rely on this
+  // behavior, handle this case separately here.
+  if (mir->canBeNegativeDividend()) {
+    {
+      ScratchRegisterScope scratch(masm);
+      // Sets EQ if lhs == INT_MIN
+      masm.ma_cmp(lhs, Imm32(INT_MIN), scratch);
+      // If EQ (LHS == INT_MIN), sets EQ if rhs == -1
+      masm.ma_cmp(rhs, Imm32(-1), scratch, Assembler::Equal);
+    }
+    if (mir->isTruncated()) {
+      // (INT_MIN % -1)|0 == 0
+      Label skip;
+      masm.ma_b(&skip, Assembler::NotEqual);
+      masm.ma_mov(Imm32(0), output);
+      masm.ma_b(&done);
+      masm.bind(&skip);
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, ins->snapshot());
+    }
+  }
+
+  modICommon(mir, lhs, rhs, output, ins->snapshot(), done);
+
+  if (gen->compilingWasm()) {
+    masm.Push(InstanceReg);
+    int32_t framePushedAfterInstance = masm.framePushed();
+    masm.setupWasmABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+    masm.callWithABI(mir->bytecodeOffset(),
+                     wasm::SymbolicAddress::aeabi_idivmod,
+                     mozilla::Some(instanceOffset));
+    masm.Pop(InstanceReg);
+  } else {
+    using Fn = int64_t (*)(int, int);
+    masm.setupAlignedABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    masm.callWithABI<Fn, __aeabi_idivmod>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+  }
+
+  MOZ_ASSERT(r1 != output);
+  masm.move32(r1, output);
+
+  // If X%Y == 0 and X < 0, then we *actually* wanted to return -0.0
+  if (mir->canBeNegativeDividend()) {
+    if (mir->isTruncated()) {
+      // -0.0|0 == 0
+    } else {
+      MOZ_ASSERT(mir->fallible());
+      // See if X < 0
+      masm.as_cmp(output, Imm8(0));
+      masm.ma_b(&done, Assembler::NotEqual);
+      masm.as_cmp(callTemp, Imm8(0));
+      bailoutIf(Assembler::Signed, ins->snapshot());
+    }
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) {
+  Register in = ToRegister(ins->getOperand(0));
+  Register out = ToRegister(ins->getDef(0));
+  MMod* mir = ins->mir();
+  Label fin;
+  // bug 739870, jbramley has a different sequence that may help with speed
+  // here.
+
+  masm.ma_mov(in, out, SetCC);
+  masm.ma_b(&fin, Assembler::Zero);
+  masm.as_rsb(out, out, Imm8(0), LeaveCC, Assembler::Signed);
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_and(Imm32((1 << ins->shift()) - 1), out, scratch);
+  }
+  masm.as_rsb(out, out, Imm8(0), SetCC, Assembler::Signed);
+  if (mir->canBeNegativeDividend()) {
+    if (!mir->isTruncated()) {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Zero, ins->snapshot());
+    } else {
+      // -0|0 == 0
+    }
+  }
+  masm.bind(&fin);
+}
+
+void CodeGenerator::visitModMaskI(LModMaskI* ins) {
+  Register src = ToRegister(ins->getOperand(0));
+  Register dest = ToRegister(ins->getDef(0));
+  Register tmp1 = ToRegister(ins->getTemp(0));
+  Register tmp2 = ToRegister(ins->getTemp(1));
+  MMod* mir = ins->mir();
+
+  ScratchRegisterScope scratch(masm);
+  SecondScratchRegisterScope scratch2(masm);
+
+  masm.ma_mod_mask(src, dest, tmp1, tmp2, scratch, scratch2, ins->shift());
+
+  if (mir->canBeNegativeDividend()) {
+    if (!mir->isTruncated()) {
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Zero, ins->snapshot());
+    } else {
+      // -0|0 == 0
+    }
+  }
+}
+
+void CodeGeneratorARM::emitBigIntDiv(LBigIntDiv* ins, Register dividend,
+                                     Register divisor, Register output,
+                                     Label* fail) {
+  // Callers handle division by zero and integer overflow.
+
+  if (HasIDIV()) {
+    masm.ma_sdiv(dividend, divisor, /* result= */ dividend);
+
+    // Create and return the result.
+    masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+    masm.initializeBigInt(output, dividend);
+
+    return;
+  }
+
+  // idivmod returns the quotient in r0, and the remainder in r1.
+  MOZ_ASSERT(dividend == r0);
+  MOZ_ASSERT(divisor == r1);
+
+  LiveRegisterSet volatileRegs = liveVolatileRegs(ins);
+  volatileRegs.takeUnchecked(dividend);
+  volatileRegs.takeUnchecked(divisor);
+  volatileRegs.takeUnchecked(output);
+
+  masm.PushRegsInMask(volatileRegs);
+
+  using Fn = int64_t (*)(int, int);
+  masm.setupUnalignedABICall(output);
+  masm.passABIArg(dividend);
+  masm.passABIArg(divisor);
+  masm.callWithABI<Fn, __aeabi_idivmod>(MoveOp::GENERAL,
+                                        CheckUnsafeCallWithABI::DontCheckOther);
+
+  masm.PopRegsInMask(volatileRegs);
+
+  // Create and return the result.
+  masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+  masm.initializeBigInt(output, dividend);
+}
+
+void CodeGeneratorARM::emitBigIntMod(LBigIntMod* ins, Register dividend,
+                                     Register divisor, Register output,
+                                     Label* fail) {
+  // Callers handle division by zero and integer overflow.
+
+  if (HasIDIV()) {
+    {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_smod(dividend, divisor, /* result= */ dividend, scratch);
+    }
+
+    // Create and return the result.
+    masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+    masm.initializeBigInt(output, dividend);
+
+    return;
+  }
+
+  // idivmod returns the quotient in r0, and the remainder in r1.
+  MOZ_ASSERT(dividend == r0);
+  MOZ_ASSERT(divisor == r1);
+
+  LiveRegisterSet volatileRegs = liveVolatileRegs(ins);
+  volatileRegs.takeUnchecked(dividend);
+  volatileRegs.takeUnchecked(divisor);
+  volatileRegs.takeUnchecked(output);
+
+  masm.PushRegsInMask(volatileRegs);
+
+  using Fn = int64_t (*)(int, int);
+  masm.setupUnalignedABICall(output);
+  masm.passABIArg(dividend);
+  masm.passABIArg(divisor);
+  masm.callWithABI<Fn, __aeabi_idivmod>(MoveOp::GENERAL,
+                                        CheckUnsafeCallWithABI::DontCheckOther);
+
+  masm.PopRegsInMask(volatileRegs);
+
+  // Create and return the result.
+  masm.newGCBigInt(output, dividend, initialBigIntHeap(), fail);
+  masm.initializeBigInt(output, divisor);
+}
+
+void CodeGenerator::visitBitNotI(LBitNotI* ins) {
+  const LAllocation* input = ins->getOperand(0);
+  const LDefinition* dest = ins->getDef(0);
+  // This will not actually be true on arm. We can not an imm8m in order to
+  // get a wider range of numbers
+  MOZ_ASSERT(!input->isConstant());
+
+  masm.ma_mvn(ToRegister(input), ToRegister(dest));
+}
+
+void CodeGenerator::visitBitOpI(LBitOpI* ins) {
+  const LAllocation* lhs = ins->getOperand(0);
+  const LAllocation* rhs = ins->getOperand(1);
+  const LDefinition* dest = ins->getDef(0);
+
+  ScratchRegisterScope scratch(masm);
+
+  // All of these bitops should be either imm32's, or integer registers.
+  switch (ins->bitop()) {
+    case JSOp::BitOr:
+      if (rhs->isConstant()) {
+        masm.ma_orr(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest),
+                    scratch);
+      } else {
+        masm.ma_orr(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
+      }
+      break;
+    case JSOp::BitXor:
+      if (rhs->isConstant()) {
+        masm.ma_eor(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest),
+                    scratch);
+      } else {
+        masm.ma_eor(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
+      }
+      break;
+    case JSOp::BitAnd:
+      if (rhs->isConstant()) {
+        masm.ma_and(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest),
+                    scratch);
+      } else {
+        masm.ma_and(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
+      }
+      break;
+    default:
+      MOZ_CRASH("unexpected binary opcode");
+  }
+}
+
+void CodeGenerator::visitShiftI(LShiftI* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  const LAllocation* rhs = ins->rhs();
+  Register dest = ToRegister(ins->output());
+
+  if (rhs->isConstant()) {
+    int32_t shift = ToInt32(rhs) & 0x1F;
+    switch (ins->bitop()) {
+      case JSOp::Lsh:
+        if (shift) {
+          masm.ma_lsl(Imm32(shift), lhs, dest);
+        } else {
+          masm.ma_mov(lhs, dest);
+        }
+        break;
+      case JSOp::Rsh:
+        if (shift) {
+          masm.ma_asr(Imm32(shift), lhs, dest);
+        } else {
+          masm.ma_mov(lhs, dest);
+        }
+        break;
+      case JSOp::Ursh:
+        if (shift) {
+          masm.ma_lsr(Imm32(shift), lhs, dest);
+        } else {
+          // x >>> 0 can overflow.
+          masm.ma_mov(lhs, dest);
+          if (ins->mir()->toUrsh()->fallible()) {
+            masm.as_cmp(dest, Imm8(0));
+            bailoutIf(Assembler::LessThan, ins->snapshot());
+          }
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  } else {
+    // The shift amounts should be AND'ed into the 0-31 range since arm
+    // shifts by the lower byte of the register (it will attempt to shift by
+    // 250 if you ask it to).
+    masm.as_and(dest, ToRegister(rhs), Imm8(0x1F));
+
+    switch (ins->bitop()) {
+      case JSOp::Lsh:
+        masm.ma_lsl(dest, lhs, dest);
+        break;
+      case JSOp::Rsh:
+        masm.ma_asr(dest, lhs, dest);
+        break;
+      case JSOp::Ursh:
+        masm.ma_lsr(dest, lhs, dest);
+        if (ins->mir()->toUrsh()->fallible()) {
+          // x >>> 0 can overflow.
+          masm.as_cmp(dest, Imm8(0));
+          bailoutIf(Assembler::LessThan, ins->snapshot());
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  }
+}
+
+void CodeGenerator::visitUrshD(LUrshD* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register temp = ToRegister(ins->temp());
+
+  const LAllocation* rhs = ins->rhs();
+  FloatRegister out = ToFloatRegister(ins->output());
+
+  if (rhs->isConstant()) {
+    int32_t shift = ToInt32(rhs) & 0x1F;
+    if (shift) {
+      masm.ma_lsr(Imm32(shift), lhs, temp);
+    } else {
+      masm.ma_mov(lhs, temp);
+    }
+  } else {
+    masm.as_and(temp, ToRegister(rhs), Imm8(0x1F));
+    masm.ma_lsr(temp, lhs, temp);
+  }
+
+  masm.convertUInt32ToDouble(temp, out);
+}
+
+void CodeGenerator::visitClzI(LClzI* ins) {
+  Register input = ToRegister(ins->input());
+  Register output = ToRegister(ins->output());
+
+  masm.clz32(input, output, /* knownNotZero = */ false);
+}
+
+void CodeGenerator::visitCtzI(LCtzI* ins) {
+  Register input = ToRegister(ins->input());
+  Register output = ToRegister(ins->output());
+
+  masm.ctz32(input, output, /* knownNotZero = */ false);
+}
+
+void CodeGenerator::visitPopcntI(LPopcntI* ins) {
+  Register input = ToRegister(ins->input());
+  Register output = ToRegister(ins->output());
+
+  Register tmp = ToRegister(ins->temp0());
+
+  masm.popcnt32(input, output, tmp);
+}
+
+void CodeGenerator::visitPowHalfD(LPowHalfD* ins) {
+  FloatRegister input = ToFloatRegister(ins->input());
+  FloatRegister output = ToFloatRegister(ins->output());
+  ScratchDoubleScope scratch(masm);
+
+  Label done;
+
+  // Masm.pow(-Infinity, 0.5) == Infinity.
+  masm.loadConstantDouble(NegativeInfinity<double>(), scratch);
+  masm.compareDouble(input, scratch);
+  masm.ma_vneg(scratch, output, Assembler::Equal);
+  masm.ma_b(&done, Assembler::Equal);
+
+  // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5).
+  // Adding 0 converts any -0 to 0.
+  masm.loadConstantDouble(0.0, scratch);
+  masm.ma_vadd(scratch, input, output);
+  masm.ma_vsqrt(output, output);
+
+  masm.bind(&done);
+}
+
+MoveOperand CodeGeneratorARM::toMoveOperand(LAllocation a) const {
+  if (a.isGeneralReg()) {
+    return MoveOperand(ToRegister(a));
+  }
+  if (a.isFloatReg()) {
+    return MoveOperand(ToFloatRegister(a));
+  }
+  MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress
+                                           : MoveOperand::Kind::Memory;
+  Address addr = ToAddress(a);
+  MOZ_ASSERT((addr.offset & 3) == 0);
+  return MoveOperand(addr, kind);
+}
+
+class js::jit::OutOfLineTableSwitch
+    : public OutOfLineCodeBase<CodeGeneratorARM> {
+  MTableSwitch* mir_;
+  Vector<CodeLabel, 8, JitAllocPolicy> codeLabels_;
+
+  void accept(CodeGeneratorARM* codegen) override {
+    codegen->visitOutOfLineTableSwitch(this);
+  }
+
+ public:
+  OutOfLineTableSwitch(TempAllocator& alloc, MTableSwitch* mir)
+      : mir_(mir), codeLabels_(alloc) {}
+
+  MTableSwitch* mir() const { return mir_; }
+
+  bool addCodeLabel(CodeLabel label) { return codeLabels_.append(label); }
+  CodeLabel codeLabel(unsigned i) { return codeLabels_[i]; }
+};
+
+void CodeGeneratorARM::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool) {
+  MTableSwitch* mir = ool->mir();
+
+  size_t numCases = mir->numCases();
+  for (size_t i = 0; i < numCases; i++) {
+    LBlock* caseblock =
+        skipTrivialBlocks(mir->getCase(numCases - 1 - i))->lir();
+    Label* caseheader = caseblock->label();
+    uint32_t caseoffset = caseheader->offset();
+
+    // The entries of the jump table need to be absolute addresses and thus
+    // must be patched after codegen is finished.
+    CodeLabel cl = ool->codeLabel(i);
+    cl.target()->bind(caseoffset);
+    masm.addCodeLabel(cl);
+  }
+}
+
+void CodeGeneratorARM::emitTableSwitchDispatch(MTableSwitch* mir,
+                                               Register index, Register base) {
+  // The code generated by this is utter hax.
+  // The end result looks something like:
+  // SUBS index, input, #base
+  // RSBSPL index, index, #max
+  // LDRPL pc, pc, index lsl 2
+  // B default
+
+  // If the range of targets in N through M, we first subtract off the lowest
+  // case (N), which both shifts the arguments into the range 0 to (M - N)
+  // with and sets the MInus flag if the argument was out of range on the low
+  // end.
+
+  // Then we a reverse subtract with the size of the jump table, which will
+  // reverse the order of range (It is size through 0, rather than 0 through
+  // size). The main purpose of this is that we set the same flag as the lower
+  // bound check for the upper bound check. Lastly, we do this conditionally
+  // on the previous check succeeding.
+
+  // Then we conditionally load the pc offset by the (reversed) index (times
+  // the address size) into the pc, which branches to the correct case. NOTE:
+  // when we go to read the pc, the value that we get back is the pc of the
+  // current instruction *PLUS 8*. This means that ldr foo, [pc, +0] reads
+  // $pc+8. In other words, there is an empty word after the branch into the
+  // switch table before the table actually starts. Since the only other
+  // unhandled case is the default case (both out of range high and out of
+  // range low) I then insert a branch to default case into the extra slot,
+  // which ensures we don't attempt to execute the address table.
+  Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
+
+  ScratchRegisterScope scratch(masm);
+
+  int32_t cases = mir->numCases();
+  // Lower value with low value.
+  masm.ma_sub(index, Imm32(mir->low()), index, scratch, SetCC);
+  masm.ma_rsb(index, Imm32(cases - 1), index, scratch, SetCC,
+              Assembler::NotSigned);
+  // Inhibit pools within the following sequence because we are indexing into
+  // a pc relative table. The region will have one instruction for ma_ldr, one
+  // for ma_b, and each table case takes one word.
+  AutoForbidPoolsAndNops afp(&masm, 1 + 1 + cases);
+  masm.ma_ldr(DTRAddr(pc, DtrRegImmShift(index, LSL, 2)), pc, Offset,
+              Assembler::NotSigned);
+  masm.ma_b(defaultcase);
+
+  // To fill in the CodeLabels for the case entries, we need to first generate
+  // the case entries (we don't yet know their offsets in the instruction
+  // stream).
+  OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(alloc(), mir);
+  for (int32_t i = 0; i < cases; i++) {
+    CodeLabel cl;
+    masm.writeCodePointer(&cl);
+    masm.propagateOOM(ool->addCodeLabel(cl));
+  }
+  addOutOfLineCode(ool, mir);
+}
+
+void CodeGenerator::visitMathD(LMathD* math) {
+  FloatRegister src1 = ToFloatRegister(math->getOperand(0));
+  FloatRegister src2 = ToFloatRegister(math->getOperand(1));
+  FloatRegister output = ToFloatRegister(math->getDef(0));
+
+  switch (math->jsop()) {
+    case JSOp::Add:
+      masm.ma_vadd(src1, src2, output);
+      break;
+    case JSOp::Sub:
+      masm.ma_vsub(src1, src2, output);
+      break;
+    case JSOp::Mul:
+      masm.ma_vmul(src1, src2, output);
+      break;
+    case JSOp::Div:
+      masm.ma_vdiv(src1, src2, output);
+      break;
+    default:
+      MOZ_CRASH("unexpected opcode");
+  }
+}
+
+void CodeGenerator::visitMathF(LMathF* math) {
+  FloatRegister src1 = ToFloatRegister(math->getOperand(0));
+  FloatRegister src2 = ToFloatRegister(math->getOperand(1));
+  FloatRegister output = ToFloatRegister(math->getDef(0));
+
+  switch (math->jsop()) {
+    case JSOp::Add:
+      masm.ma_vadd_f32(src1, src2, output);
+      break;
+    case JSOp::Sub:
+      masm.ma_vsub_f32(src1, src2, output);
+      break;
+    case JSOp::Mul:
+      masm.ma_vmul_f32(src1, src2, output);
+      break;
+    case JSOp::Div:
+      masm.ma_vdiv_f32(src1, src2, output);
+      break;
+    default:
+      MOZ_CRASH("unexpected opcode");
+  }
+}
+
+void CodeGenerator::visitTruncateDToInt32(LTruncateDToInt32* ins) {
+  emitTruncateDouble(ToFloatRegister(ins->input()), ToRegister(ins->output()),
+                     ins->mir());
+}
+
+void CodeGenerator::visitWasmBuiltinTruncateDToInt32(
+    LWasmBuiltinTruncateDToInt32* ins) {
+  emitTruncateDouble(ToFloatRegister(ins->getOperand(0)),
+                     ToRegister(ins->getDef(0)), ins->mir());
+}
+
+void CodeGenerator::visitTruncateFToInt32(LTruncateFToInt32* ins) {
+  emitTruncateFloat32(ToFloatRegister(ins->input()), ToRegister(ins->output()),
+                      ins->mir());
+}
+
+void CodeGenerator::visitWasmBuiltinTruncateFToInt32(
+    LWasmBuiltinTruncateFToInt32* ins) {
+  emitTruncateFloat32(ToFloatRegister(ins->getOperand(0)),
+                      ToRegister(ins->getDef(0)), ins->mir());
+}
+
+ValueOperand CodeGeneratorARM::ToValue(LInstruction* ins, size_t pos) {
+  Register typeReg = ToRegister(ins->getOperand(pos + TYPE_INDEX));
+  Register payloadReg = ToRegister(ins->getOperand(pos + PAYLOAD_INDEX));
+  return ValueOperand(typeReg, payloadReg);
+}
+
+ValueOperand CodeGeneratorARM::ToTempValue(LInstruction* ins, size_t pos) {
+  Register typeReg = ToRegister(ins->getTemp(pos + TYPE_INDEX));
+  Register payloadReg = ToRegister(ins->getTemp(pos + PAYLOAD_INDEX));
+  return ValueOperand(typeReg, payloadReg);
+}
+
+void CodeGenerator::visitValue(LValue* value) {
+  const ValueOperand out = ToOutValue(value);
+
+  masm.moveValue(value->value(), out);
+}
+
+void CodeGenerator::visitBox(LBox* box) {
+  const LDefinition* type = box->getDef(TYPE_INDEX);
+
+  MOZ_ASSERT(!box->getOperand(0)->isConstant());
+
+  // On arm, the input operand and the output payload have the same virtual
+  // register. All that needs to be written is the type tag for the type
+  // definition.
+  masm.ma_mov(Imm32(MIRTypeToTag(box->type())), ToRegister(type));
+}
+
+void CodeGenerator::visitBoxFloatingPoint(LBoxFloatingPoint* box) {
+  const AnyRegister in = ToAnyRegister(box->getOperand(0));
+  const ValueOperand out = ToOutValue(box);
+
+  masm.moveValue(TypedOrValueRegister(box->type(), in), out);
+}
+
+void CodeGenerator::visitUnbox(LUnbox* unbox) {
+  // Note that for unbox, the type and payload indexes are switched on the
+  // inputs.
+  MUnbox* mir = unbox->mir();
+  Register type = ToRegister(unbox->type());
+  Register payload = ToRegister(unbox->payload());
+  Register output = ToRegister(unbox->output());
+
+  mozilla::Maybe<ScratchRegisterScope> scratch;
+  scratch.emplace(masm);
+
+  JSValueTag tag = MIRTypeToTag(mir->type());
+  if (mir->fallible()) {
+    masm.ma_cmp(type, Imm32(tag), *scratch);
+    bailoutIf(Assembler::NotEqual, unbox->snapshot());
+  } else {
+#ifdef DEBUG
+    Label ok;
+    masm.ma_cmp(type, Imm32(tag), *scratch);
+    masm.ma_b(&ok, Assembler::Equal);
+    scratch.reset();
+    masm.assumeUnreachable("Infallible unbox type mismatch");
+    masm.bind(&ok);
+#endif
+  }
+
+  // Note: If spectreValueMasking is disabled, then this instruction will
+  // default to a no-op as long as the lowering allocate the same register for
+  // the output and the payload.
+  masm.unboxNonDouble(ValueOperand(type, payload), output,
+                      ValueTypeFromMIRType(mir->type()));
+}
+
+void CodeGenerator::visitDouble(LDouble* ins) {
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantDouble(ins->value(), ToFloatRegister(out));
+}
+
+void CodeGenerator::visitFloat32(LFloat32* ins) {
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantFloat32(ins->value(), ToFloatRegister(out));
+}
+
+void CodeGeneratorARM::splitTagForTest(const ValueOperand& value,
+                                       ScratchTagScope& tag) {
+  MOZ_ASSERT(value.typeReg() == tag);
+}
+
+void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) {
+  const LAllocation* opd = test->input();
+  masm.ma_vcmpz(ToFloatRegister(opd));
+  masm.as_vmrs(pc);
+
+  MBasicBlock* ifTrue = test->ifTrue();
+  MBasicBlock* ifFalse = test->ifFalse();
+  // If the compare set the 0 bit, then the result is definitely false.
+  jumpToBlock(ifFalse, Assembler::Zero);
+  // It is also false if one of the operands is NAN, which is shown as
+  // Overflow.
+  jumpToBlock(ifFalse, Assembler::Overflow);
+  jumpToBlock(ifTrue);
+}
+
+void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) {
+  const LAllocation* opd = test->input();
+  masm.ma_vcmpz_f32(ToFloatRegister(opd));
+  masm.as_vmrs(pc);
+
+  MBasicBlock* ifTrue = test->ifTrue();
+  MBasicBlock* ifFalse = test->ifFalse();
+  // If the compare set the 0 bit, then the result is definitely false.
+  jumpToBlock(ifFalse, Assembler::Zero);
+  // It is also false if one of the operands is NAN, which is shown as
+  // Overflow.
+  jumpToBlock(ifFalse, Assembler::Overflow);
+  jumpToBlock(ifTrue);
+}
+
+void CodeGenerator::visitCompareD(LCompareD* comp) {
+  FloatRegister lhs = ToFloatRegister(comp->left());
+  FloatRegister rhs = ToFloatRegister(comp->right());
+
+  Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+  masm.compareDouble(lhs, rhs);
+  masm.emitSet(Assembler::ConditionFromDoubleCondition(cond),
+               ToRegister(comp->output()));
+}
+
+void CodeGenerator::visitCompareF(LCompareF* comp) {
+  FloatRegister lhs = ToFloatRegister(comp->left());
+  FloatRegister rhs = ToFloatRegister(comp->right());
+
+  Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+  masm.compareFloat(lhs, rhs);
+  masm.emitSet(Assembler::ConditionFromDoubleCondition(cond),
+               ToRegister(comp->output()));
+}
+
+void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) {
+  FloatRegister lhs = ToFloatRegister(comp->left());
+  FloatRegister rhs = ToFloatRegister(comp->right());
+
+  Assembler::DoubleCondition cond =
+      JSOpToDoubleCondition(comp->cmpMir()->jsop());
+  masm.compareDouble(lhs, rhs);
+  emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(),
+             comp->ifFalse());
+}
+
+void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) {
+  FloatRegister lhs = ToFloatRegister(comp->left());
+  FloatRegister rhs = ToFloatRegister(comp->right());
+
+  Assembler::DoubleCondition cond =
+      JSOpToDoubleCondition(comp->cmpMir()->jsop());
+  masm.compareFloat(lhs, rhs);
+  emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(),
+             comp->ifFalse());
+}
+
+void CodeGenerator::visitBitAndAndBranch(LBitAndAndBranch* baab) {
+  // LBitAndAndBranch only represents single-word ANDs, hence it can't be
+  // 64-bit here.
+  MOZ_ASSERT(!baab->is64());
+  Register regL = ToRegister(baab->left());
+  if (baab->right()->isConstant()) {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_tst(regL, Imm32(ToInt32(baab->right())), scratch);
+  } else {
+    masm.ma_tst(regL, ToRegister(baab->right()));
+  }
+  emitBranch(baab->cond(), baab->ifTrue(), baab->ifFalse());
+}
+
+void CodeGenerator::visitWasmUint32ToDouble(LWasmUint32ToDouble* lir) {
+  masm.convertUInt32ToDouble(ToRegister(lir->input()),
+                             ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitWasmUint32ToFloat32(LWasmUint32ToFloat32* lir) {
+  masm.convertUInt32ToFloat32(ToRegister(lir->input()),
+                              ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitNotI(LNotI* ins) {
+  // It is hard to optimize !x, so just do it the basic way for now.
+  masm.as_cmp(ToRegister(ins->input()), Imm8(0));
+  masm.emitSet(Assembler::Equal, ToRegister(ins->output()));
+}
+
+void CodeGenerator::visitNotI64(LNotI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register output = ToRegister(lir->output());
+
+  masm.ma_orr(input.low, input.high, output);
+  masm.as_cmp(output, Imm8(0));
+  masm.emitSet(Assembler::Equal, output);
+}
+
+void CodeGenerator::visitNotD(LNotD* ins) {
+  // Since this operation is not, we want to set a bit if the double is
+  // falsey, which means 0.0, -0.0 or NaN. When comparing with 0, an input of
+  // 0 will set the Z bit (30) and NaN will set the V bit (28) of the APSR.
+  FloatRegister opd = ToFloatRegister(ins->input());
+  Register dest = ToRegister(ins->output());
+
+  // Do the compare.
+  masm.ma_vcmpz(opd);
+  // TODO There are three variations here to compare performance-wise.
+  bool nocond = true;
+  if (nocond) {
+    // Load the value into the dest register.
+    masm.as_vmrs(dest);
+    masm.ma_lsr(Imm32(28), dest, dest);
+    // 28 + 2 = 30
+    masm.ma_alu(dest, lsr(dest, 2), dest, OpOrr);
+    masm.as_and(dest, dest, Imm8(1));
+  } else {
+    masm.as_vmrs(pc);
+    masm.ma_mov(Imm32(0), dest);
+    masm.ma_mov(Imm32(1), dest, Assembler::Equal);
+    masm.ma_mov(Imm32(1), dest, Assembler::Overflow);
+  }
+}
+
+void CodeGenerator::visitNotF(LNotF* ins) {
+  // Since this operation is not, we want to set a bit if the double is
+  // falsey, which means 0.0, -0.0 or NaN. When comparing with 0, an input of
+  // 0 will set the Z bit (30) and NaN will set the V bit (28) of the APSR.
+  FloatRegister opd = ToFloatRegister(ins->input());
+  Register dest = ToRegister(ins->output());
+
+  // Do the compare.
+  masm.ma_vcmpz_f32(opd);
+  // TODO There are three variations here to compare performance-wise.
+  bool nocond = true;
+  if (nocond) {
+    // Load the value into the dest register.
+    masm.as_vmrs(dest);
+    masm.ma_lsr(Imm32(28), dest, dest);
+    // 28 + 2 = 30
+    masm.ma_alu(dest, lsr(dest, 2), dest, OpOrr);
+    masm.as_and(dest, dest, Imm8(1));
+  } else {
+    masm.as_vmrs(pc);
+    masm.ma_mov(Imm32(0), dest);
+    masm.ma_mov(Imm32(1), dest, Assembler::Equal);
+    masm.ma_mov(Imm32(1), dest, Assembler::Overflow);
+  }
+}
+
+void CodeGeneratorARM::generateInvalidateEpilogue() {
+  // Ensure that there is enough space in the buffer for the OsiPoint patching
+  // to occur. Otherwise, we could overwrite the invalidation epilogue.
+  for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) {
+    masm.nop();
+  }
+
+  masm.bind(&invalidate_);
+
+  // Push the return address of the point that we bailed out at onto the stack.
+  masm.Push(lr);
+
+  // Push the Ion script onto the stack (when we determine what that pointer
+  // is).
+  invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
+
+  // Jump to the invalidator which will replace the current frame.
+  TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk();
+  masm.jump(thunk);
+}
+
+void CodeGenerator::visitCompareExchangeTypedArrayElement(
+    LCompareExchangeTypedArrayElement* lir) {
+  Register elements = ToRegister(lir->elements());
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register temp =
+      lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+  Register oldval = ToRegister(lir->oldval());
+  Register newval = ToRegister(lir->newval());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
+                           newval, temp, output);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
+                           newval, temp, output);
+  }
+}
+
+void CodeGenerator::visitAtomicExchangeTypedArrayElement(
+    LAtomicExchangeTypedArrayElement* lir) {
+  Register elements = ToRegister(lir->elements());
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register temp =
+      lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+  Register value = ToRegister(lir->value());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
+                          output);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
+                          output);
+  }
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinop(
+    LAtomicTypedArrayElementBinop* lir) {
+  MOZ_ASSERT(!lir->mir()->isForEffect());
+
+  AnyRegister output = ToAnyRegister(lir->output());
+  Register elements = ToRegister(lir->elements());
+  Register flagTemp = ToRegister(lir->temp1());
+  Register outTemp =
+      lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
+  Register value = ToRegister(lir->value());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address mem = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
+                         lir->mir()->operation(), value, mem, flagTemp, outTemp,
+                         output);
+  } else {
+    BaseIndex mem(elements, ToRegister(lir->index()),
+                  ScaleFromScalarType(arrayType));
+    masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
+                         lir->mir()->operation(), value, mem, flagTemp, outTemp,
+                         output);
+  }
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect(
+    LAtomicTypedArrayElementBinopForEffect* lir) {
+  MOZ_ASSERT(lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register flagTemp = ToRegister(lir->flagTemp());
+  Register value = ToRegister(lir->value());
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  if (lir->index()->isConstant()) {
+    Address mem = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
+                          lir->mir()->operation(), value, mem, flagTemp);
+  } else {
+    BaseIndex mem(elements, ToRegister(lir->index()),
+                  ScaleFromScalarType(arrayType));
+    masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
+                          lir->mir()->operation(), value, mem, flagTemp);
+  }
+}
+
+void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register temp = ToRegister(lir->temp());
+  Register64 temp64 = ToRegister64(lir->temp64());
+  Register out = ToRegister(lir->output());
+
+  const MLoadUnboxedScalar* mir = lir->mir();
+
+  Scalar::Type storageType = mir->storageType();
+
+  if (lir->index()->isConstant()) {
+    Address source =
+        ToAddress(elements, lir->index(), storageType, mir->offsetAdjustment());
+    masm.atomicLoad64(Synchronization::Load(), source, temp64);
+  } else {
+    BaseIndex source(elements, ToRegister(lir->index()),
+                     ScaleFromScalarType(storageType), mir->offsetAdjustment());
+    masm.atomicLoad64(Synchronization::Load(), source, temp64);
+  }
+
+  emitCreateBigInt(lir, storageType, temp64, out, temp);
+}
+
+void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+
+  Scalar::Type writeType = lir->mir()->writeType();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), writeType);
+    masm.atomicStore64(Synchronization::Store(), dest, temp1, temp2);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(writeType));
+    masm.atomicStore64(Synchronization::Store(), dest, temp1, temp2);
+  }
+}
+
+void CodeGenerator::visitCompareExchangeTypedArrayElement64(
+    LCompareExchangeTypedArrayElement64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register oldval = ToRegister(lir->oldval());
+  Register newval = ToRegister(lir->newval());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+  Register64 temp3 = ToRegister64(lir->temp3());
+  Register out = ToRegister(lir->output());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  masm.loadBigInt64(oldval, temp1);
+  masm.loadBigInt64(newval, temp2);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.compareExchange64(Synchronization::Full(), dest, temp1, temp2, temp3);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.compareExchange64(Synchronization::Full(), dest, temp1, temp2, temp3);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp3, out, temp1.scratchReg());
+}
+
+void CodeGenerator::visitAtomicExchangeTypedArrayElement64(
+    LAtomicExchangeTypedArrayElement64* lir) {
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register temp2 = ToRegister(lir->temp2());
+  Register out = ToRegister(lir->output());
+  Register64 temp64 = Register64(temp2, out);
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+
+  masm.loadBigInt64(value, temp64);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicExchange64(Synchronization::Full(), dest, temp64, temp1);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicExchange64(Synchronization::Full(), dest, temp64, temp1);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp1, out, temp2);
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinop64(
+    LAtomicTypedArrayElementBinop64* lir) {
+  MOZ_ASSERT(!lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+  Register64 temp3 = ToRegister64(lir->temp3());
+  Register out = ToRegister(lir->output());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+  AtomicOp atomicOp = lir->mir()->operation();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest, temp2,
+                         temp3);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest, temp2,
+                         temp3);
+  }
+
+  emitCreateBigInt(lir, arrayType, temp3, out, temp2.scratchReg());
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect64(
+    LAtomicTypedArrayElementBinopForEffect64* lir) {
+  MOZ_ASSERT(lir->mir()->isForEffect());
+
+  Register elements = ToRegister(lir->elements());
+  Register value = ToRegister(lir->value());
+  Register64 temp1 = ToRegister64(lir->temp1());
+  Register64 temp2 = ToRegister64(lir->temp2());
+
+  Scalar::Type arrayType = lir->mir()->arrayType();
+  AtomicOp atomicOp = lir->mir()->operation();
+
+  masm.loadBigInt64(value, temp1);
+
+  if (lir->index()->isConstant()) {
+    Address dest = ToAddress(elements, lir->index(), arrayType);
+    masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest,
+                          temp2);
+  } else {
+    BaseIndex dest(elements, ToRegister(lir->index()),
+                   ScaleFromScalarType(arrayType));
+    masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest,
+                          temp2);
+  }
+}
+
+void CodeGenerator::visitWasmSelect(LWasmSelect* ins) {
+  MIRType mirType = ins->mir()->type();
+
+  Register cond = ToRegister(ins->condExpr());
+  masm.as_cmp(cond, Imm8(0));
+
+  if (mirType == MIRType::Int32 || mirType == MIRType::RefOrNull) {
+    Register falseExpr = ToRegister(ins->falseExpr());
+    Register out = ToRegister(ins->output());
+    MOZ_ASSERT(ToRegister(ins->trueExpr()) == out,
+               "true expr input is reused for output");
+    masm.ma_mov(falseExpr, out, LeaveCC, Assembler::Zero);
+    return;
+  }
+
+  FloatRegister out = ToFloatRegister(ins->output());
+  MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out,
+             "true expr input is reused for output");
+
+  FloatRegister falseExpr = ToFloatRegister(ins->falseExpr());
+
+  if (mirType == MIRType::Double) {
+    masm.moveDouble(falseExpr, out, Assembler::Zero);
+  } else if (mirType == MIRType::Float32) {
+    masm.moveFloat32(falseExpr, out, Assembler::Zero);
+  } else {
+    MOZ_CRASH("unhandled type in visitWasmSelect!");
+  }
+}
+
+// We expect to handle only the case where compare is {U,}Int32 and select is
+// {U,}Int32, and the "true" input is reused for the output.
+void CodeGenerator::visitWasmCompareAndSelect(LWasmCompareAndSelect* ins) {
+  bool cmpIs32bit = ins->compareType() == MCompare::Compare_Int32 ||
+                    ins->compareType() == MCompare::Compare_UInt32;
+  bool selIs32bit = ins->mir()->type() == MIRType::Int32;
+
+  MOZ_RELEASE_ASSERT(
+      cmpIs32bit && selIs32bit,
+      "CodeGenerator::visitWasmCompareAndSelect: unexpected types");
+
+  Register trueExprAndDest = ToRegister(ins->output());
+  MOZ_ASSERT(ToRegister(ins->ifTrueExpr()) == trueExprAndDest,
+             "true expr input is reused for output");
+
+  Assembler::Condition cond = Assembler::InvertCondition(
+      JSOpToCondition(ins->compareType(), ins->jsop()));
+  const LAllocation* rhs = ins->rightExpr();
+  const LAllocation* falseExpr = ins->ifFalseExpr();
+  Register lhs = ToRegister(ins->leftExpr());
+
+  masm.cmp32Move32(cond, lhs, ToRegister(rhs), ToRegister(falseExpr),
+                   trueExprAndDest);
+}
+
+void CodeGenerator::visitWasmReinterpret(LWasmReinterpret* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MWasmReinterpret* ins = lir->mir();
+
+  MIRType to = ins->type();
+  DebugOnly<MIRType> from = ins->input()->type();
+
+  switch (to) {
+    case MIRType::Int32:
+      MOZ_ASSERT(static_cast<MIRType>(from) == MIRType::Float32);
+      masm.ma_vxfer(ToFloatRegister(lir->input()), ToRegister(lir->output()));
+      break;
+    case MIRType::Float32:
+      MOZ_ASSERT(static_cast<MIRType>(from) == MIRType::Int32);
+      masm.ma_vxfer(ToRegister(lir->input()), ToFloatRegister(lir->output()));
+      break;
+    case MIRType::Double:
+    case MIRType::Int64:
+      MOZ_CRASH("not handled by this LIR opcode");
+    default:
+      MOZ_CRASH("unexpected WasmReinterpret");
+  }
+}
+
+void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) {
+  const MAsmJSLoadHeap* mir = ins->mir();
+
+  const LAllocation* ptr = ins->ptr();
+  const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+  bool isSigned;
+  int size;
+  bool isFloat = false;
+  switch (mir->accessType()) {
+    case Scalar::Int8:
+      isSigned = true;
+      size = 8;
+      break;
+    case Scalar::Uint8:
+      isSigned = false;
+      size = 8;
+      break;
+    case Scalar::Int16:
+      isSigned = true;
+      size = 16;
+      break;
+    case Scalar::Uint16:
+      isSigned = false;
+      size = 16;
+      break;
+    case Scalar::Int32:
+    case Scalar::Uint32:
+      isSigned = true;
+      size = 32;
+      break;
+    case Scalar::Float64:
+      isFloat = true;
+      size = 64;
+      break;
+    case Scalar::Float32:
+      isFloat = true;
+      size = 32;
+      break;
+    default:
+      MOZ_CRASH("unexpected array type");
+  }
+
+  if (ptr->isConstant()) {
+    MOZ_ASSERT(!mir->needsBoundsCheck());
+    int32_t ptrImm = ptr->toConstant()->toInt32();
+    MOZ_ASSERT(ptrImm >= 0);
+    if (isFloat) {
+      ScratchRegisterScope scratch(masm);
+      VFPRegister vd(ToFloatRegister(ins->output()));
+      if (size == 32) {
+        masm.ma_vldr(Address(HeapReg, ptrImm), vd.singleOverlay(), scratch,
+                     Assembler::Always);
+      } else {
+        masm.ma_vldr(Address(HeapReg, ptrImm), vd, scratch, Assembler::Always);
+      }
+    } else {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_dataTransferN(IsLoad, size, isSigned, HeapReg, Imm32(ptrImm),
+                            ToRegister(ins->output()), scratch, Offset,
+                            Assembler::Always);
+    }
+  } else {
+    Register ptrReg = ToRegister(ptr);
+    if (isFloat) {
+      FloatRegister output = ToFloatRegister(ins->output());
+      if (size == 32) {
+        output = output.singleOverlay();
+      }
+
+      Assembler::Condition cond = Assembler::Always;
+      if (mir->needsBoundsCheck()) {
+        Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+        masm.as_cmp(ptrReg, O2Reg(boundsCheckLimitReg));
+        if (size == 32) {
+          masm.ma_vimm_f32(GenericNaN(), output, Assembler::AboveOrEqual);
+        } else {
+          masm.ma_vimm(GenericNaN(), output, Assembler::AboveOrEqual);
+        }
+        cond = Assembler::Below;
+      }
+
+      ScratchRegisterScope scratch(masm);
+      masm.ma_vldr(output, HeapReg, ptrReg, scratch, 0, cond);
+    } else {
+      Register output = ToRegister(ins->output());
+
+      Assembler::Condition cond = Assembler::Always;
+      if (mir->needsBoundsCheck()) {
+        Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+        masm.as_cmp(ptrReg, O2Reg(boundsCheckLimitReg));
+        masm.ma_mov(Imm32(0), output, Assembler::AboveOrEqual);
+        cond = Assembler::Below;
+      }
+
+      ScratchRegisterScope scratch(masm);
+      masm.ma_dataTransferN(IsLoad, size, isSigned, HeapReg, ptrReg, output,
+                            scratch, Offset, cond);
+    }
+  }
+}
+
+void CodeGenerator::visitWasmHeapBase(LWasmHeapBase* ins) {
+  MOZ_ASSERT(ins->instance()->isBogus());
+  masm.movePtr(HeapReg, ToRegister(ins->output()));
+}
+
+template <typename T>
+void CodeGeneratorARM::emitWasmLoad(T* lir) {
+  const MWasmLoad* mir = lir->mir();
+  MIRType resultType = mir->type();
+  Register ptr;
+
+  if (mir->access().offset() || mir->access().type() == Scalar::Int64) {
+    ptr = ToRegister(lir->ptrCopy());
+  } else {
+    MOZ_ASSERT(lir->ptrCopy()->isBogusTemp());
+    ptr = ToRegister(lir->ptr());
+  }
+
+  if (resultType == MIRType::Int64) {
+    masm.wasmLoadI64(mir->access(), HeapReg, ptr, ptr, ToOutRegister64(lir));
+  } else {
+    masm.wasmLoad(mir->access(), HeapReg, ptr, ptr,
+                  ToAnyRegister(lir->output()));
+  }
+}
+
+void CodeGenerator::visitWasmLoad(LWasmLoad* lir) { emitWasmLoad(lir); }
+
+void CodeGenerator::visitWasmLoadI64(LWasmLoadI64* lir) { emitWasmLoad(lir); }
+
+void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) {
+  MWasmAddOffset* mir = lir->mir();
+  Register base = ToRegister(lir->base());
+  Register out = ToRegister(lir->output());
+
+  ScratchRegisterScope scratch(masm);
+  masm.ma_add(base, Imm32(mir->offset()), out, scratch, SetCC);
+  OutOfLineAbortingWasmTrap* ool = new (alloc())
+      OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds);
+  addOutOfLineCode(ool, mir);
+  masm.ma_b(ool->entry(), Assembler::CarrySet);
+}
+
+void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) {
+  MWasmAddOffset* mir = lir->mir();
+  Register64 base = ToRegister64(lir->base());
+  Register64 out = ToOutRegister64(lir);
+  MOZ_ASSERT(base.low != out.high && base.high != out.low);
+
+  ScratchRegisterScope scratch(masm);
+  masm.ma_add(base.low, Imm32(mir->offset()), out.low, scratch, SetCC);
+  masm.ma_adc(base.high, Imm32(mir->offset() >> 32), out.high, scratch, SetCC);
+  OutOfLineAbortingWasmTrap* ool = new (alloc())
+      OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds);
+  addOutOfLineCode(ool, mir);
+  masm.ma_b(ool->entry(), Assembler::CarrySet);
+}
+
+template <typename T>
+void CodeGeneratorARM::emitWasmStore(T* lir) {
+  const MWasmStore* mir = lir->mir();
+  Scalar::Type accessType = mir->access().type();
+  Register ptr;
+
+  // Maybe add the offset.
+  if (mir->access().offset() || accessType == Scalar::Int64) {
+    ptr = ToRegister(lir->ptrCopy());
+  } else {
+    MOZ_ASSERT(lir->ptrCopy()->isBogusTemp());
+    ptr = ToRegister(lir->ptr());
+  }
+
+  if (accessType == Scalar::Int64) {
+    masm.wasmStoreI64(mir->access(),
+                      ToRegister64(lir->getInt64Operand(lir->ValueIndex)),
+                      HeapReg, ptr, ptr);
+  } else {
+    masm.wasmStore(mir->access(),
+                   ToAnyRegister(lir->getOperand(lir->ValueIndex)), HeapReg,
+                   ptr, ptr);
+  }
+}
+
+void CodeGenerator::visitWasmStore(LWasmStore* lir) { emitWasmStore(lir); }
+
+void CodeGenerator::visitWasmStoreI64(LWasmStoreI64* lir) {
+  emitWasmStore(lir);
+}
+
+void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) {
+  const MAsmJSStoreHeap* mir = ins->mir();
+
+  const LAllocation* ptr = ins->ptr();
+  const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+  bool isSigned;
+  int size;
+  bool isFloat = false;
+  switch (mir->accessType()) {
+    case Scalar::Int8:
+    case Scalar::Uint8:
+      isSigned = false;
+      size = 8;
+      break;
+    case Scalar::Int16:
+    case Scalar::Uint16:
+      isSigned = false;
+      size = 16;
+      break;
+    case Scalar::Int32:
+    case Scalar::Uint32:
+      isSigned = true;
+      size = 32;
+      break;
+    case Scalar::Float64:
+      isFloat = true;
+      size = 64;
+      break;
+    case Scalar::Float32:
+      isFloat = true;
+      size = 32;
+      break;
+    default:
+      MOZ_CRASH("unexpected array type");
+  }
+
+  if (ptr->isConstant()) {
+    MOZ_ASSERT(!mir->needsBoundsCheck());
+    int32_t ptrImm = ptr->toConstant()->toInt32();
+    MOZ_ASSERT(ptrImm >= 0);
+    if (isFloat) {
+      VFPRegister vd(ToFloatRegister(ins->value()));
+      Address addr(HeapReg, ptrImm);
+      if (size == 32) {
+        masm.storeFloat32(vd, addr);
+      } else {
+        masm.storeDouble(vd, addr);
+      }
+    } else {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_dataTransferN(IsStore, size, isSigned, HeapReg, Imm32(ptrImm),
+                            ToRegister(ins->value()), scratch, Offset,
+                            Assembler::Always);
+    }
+  } else {
+    Register ptrReg = ToRegister(ptr);
+
+    Assembler::Condition cond = Assembler::Always;
+    if (mir->needsBoundsCheck()) {
+      Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+      masm.as_cmp(ptrReg, O2Reg(boundsCheckLimitReg));
+      cond = Assembler::Below;
+    }
+
+    if (isFloat) {
+      ScratchRegisterScope scratch(masm);
+      FloatRegister value = ToFloatRegister(ins->value());
+      if (size == 32) {
+        value = value.singleOverlay();
+      }
+
+      masm.ma_vstr(value, HeapReg, ptrReg, scratch, 0, Assembler::Below);
+    } else {
+      ScratchRegisterScope scratch(masm);
+      Register value = ToRegister(ins->value());
+      masm.ma_dataTransferN(IsStore, size, isSigned, HeapReg, ptrReg, value,
+                            scratch, Offset, cond);
+    }
+  }
+}
+
+void CodeGenerator::visitWasmCompareExchangeHeap(
+    LWasmCompareExchangeHeap* ins) {
+  MWasmCompareExchangeHeap* mir = ins->mir();
+
+  const LAllocation* ptr = ins->ptr();
+  Register ptrReg = ToRegister(ptr);
+  BaseIndex srcAddr(HeapReg, ptrReg, TimesOne, mir->access().offset());
+
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  Register oldval = ToRegister(ins->oldValue());
+  Register newval = ToRegister(ins->newValue());
+  Register out = ToRegister(ins->output());
+
+  masm.wasmCompareExchange(mir->access(), srcAddr, oldval, newval, out);
+}
+
+void CodeGenerator::visitWasmAtomicExchangeHeap(LWasmAtomicExchangeHeap* ins) {
+  MWasmAtomicExchangeHeap* mir = ins->mir();
+
+  Register ptrReg = ToRegister(ins->ptr());
+  Register value = ToRegister(ins->value());
+  Register output = ToRegister(ins->output());
+  BaseIndex srcAddr(HeapReg, ptrReg, TimesOne, mir->access().offset());
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  masm.wasmAtomicExchange(mir->access(), srcAddr, value, output);
+}
+
+void CodeGenerator::visitWasmAtomicBinopHeap(LWasmAtomicBinopHeap* ins) {
+  MWasmAtomicBinopHeap* mir = ins->mir();
+  MOZ_ASSERT(mir->hasUses());
+
+  Register ptrReg = ToRegister(ins->ptr());
+  Register flagTemp = ToRegister(ins->flagTemp());
+  Register output = ToRegister(ins->output());
+  const LAllocation* value = ins->value();
+  AtomicOp op = mir->operation();
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  BaseIndex srcAddr(HeapReg, ptrReg, TimesOne, mir->access().offset());
+  masm.wasmAtomicFetchOp(mir->access(), op, ToRegister(value), srcAddr,
+                         flagTemp, output);
+}
+
+void CodeGenerator::visitWasmAtomicBinopHeapForEffect(
+    LWasmAtomicBinopHeapForEffect* ins) {
+  MWasmAtomicBinopHeap* mir = ins->mir();
+  MOZ_ASSERT(!mir->hasUses());
+
+  Register ptrReg = ToRegister(ins->ptr());
+  Register flagTemp = ToRegister(ins->flagTemp());
+  const LAllocation* value = ins->value();
+  AtomicOp op = mir->operation();
+  MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+  BaseIndex srcAddr(HeapReg, ptrReg, TimesOne, mir->access().offset());
+  masm.wasmAtomicEffectOp(mir->access(), op, ToRegister(value), srcAddr,
+                          flagTemp);
+}
+
+void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) {
+  const MWasmStackArg* mir = ins->mir();
+  Address dst(StackPointer, mir->spOffset());
+  ScratchRegisterScope scratch(masm);
+  SecondScratchRegisterScope scratch2(masm);
+
+  if (ins->arg()->isConstant()) {
+    masm.ma_mov(Imm32(ToInt32(ins->arg())), scratch);
+    masm.ma_str(scratch, dst, scratch2);
+  } else {
+    if (ins->arg()->isGeneralReg()) {
+      masm.ma_str(ToRegister(ins->arg()), dst, scratch);
+    } else {
+      masm.ma_vstr(ToFloatRegister(ins->arg()), dst, scratch);
+    }
+  }
+}
+
+void CodeGenerator::visitUDiv(LUDiv* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+
+  Label done;
+  generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), ins->mir());
+
+  masm.ma_udiv(lhs, rhs, output);
+
+  // Check for large unsigned result - represent as double.
+  if (!ins->mir()->isTruncated()) {
+    MOZ_ASSERT(ins->mir()->fallible());
+    masm.as_cmp(output, Imm8(0));
+    bailoutIf(Assembler::LessThan, ins->snapshot());
+  }
+
+  // Check for non-zero remainder if not truncating to int.
+  if (!ins->mir()->canTruncateRemainder()) {
+    MOZ_ASSERT(ins->mir()->fallible());
+    {
+      ScratchRegisterScope scratch(masm);
+      masm.ma_mul(rhs, output, scratch);
+      masm.ma_cmp(scratch, lhs);
+    }
+    bailoutIf(Assembler::NotEqual, ins->snapshot());
+  }
+
+  if (done.used()) {
+    masm.bind(&done);
+  }
+}
+
+void CodeGenerator::visitUMod(LUMod* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+
+  Label done;
+  generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), ins->mir());
+
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_umod(lhs, rhs, output, scratch);
+  }
+
+  // Check for large unsigned result - represent as double.
+  if (!ins->mir()->isTruncated()) {
+    MOZ_ASSERT(ins->mir()->fallible());
+    masm.as_cmp(output, Imm8(0));
+    bailoutIf(Assembler::LessThan, ins->snapshot());
+  }
+
+  if (done.used()) {
+    masm.bind(&done);
+  }
+}
+
+template <class T>
+void CodeGeneratorARM::generateUDivModZeroCheck(Register rhs, Register output,
+                                                Label* done,
+                                                LSnapshot* snapshot, T* mir) {
+  if (!mir) {
+    return;
+  }
+  if (mir->canBeDivideByZero()) {
+    masm.as_cmp(rhs, Imm8(0));
+    if (mir->isTruncated()) {
+      if (mir->trapOnError()) {
+        Label nonZero;
+        masm.ma_b(&nonZero, Assembler::NotEqual);
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+        masm.bind(&nonZero);
+      } else {
+        Label skip;
+        masm.ma_b(&skip, Assembler::NotEqual);
+        // Infinity|0 == 0
+        masm.ma_mov(Imm32(0), output);
+        masm.ma_b(done);
+        masm.bind(&skip);
+      }
+    } else {
+      // Bailout for divide by zero
+      MOZ_ASSERT(mir->fallible());
+      bailoutIf(Assembler::Equal, snapshot);
+    }
+  }
+}
+
+void CodeGenerator::visitSoftUDivOrMod(LSoftUDivOrMod* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  Register rhs = ToRegister(ins->rhs());
+  Register output = ToRegister(ins->output());
+
+  MOZ_ASSERT(lhs == r0);
+  MOZ_ASSERT(rhs == r1);
+  MOZ_ASSERT(output == r0);
+
+  Label done;
+  MDiv* div = ins->mir()->isDiv() ? ins->mir()->toDiv() : nullptr;
+  MMod* mod = !div ? ins->mir()->toMod() : nullptr;
+
+  generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), div);
+  generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), mod);
+
+  if (gen->compilingWasm()) {
+    masm.Push(InstanceReg);
+    int32_t framePushedAfterInstance = masm.framePushed();
+    masm.setupWasmABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    wasm::BytecodeOffset bytecodeOffset =
+        (div ? div->bytecodeOffset() : mod->bytecodeOffset());
+    int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+    masm.callWithABI(bytecodeOffset, wasm::SymbolicAddress::aeabi_uidivmod,
+                     mozilla::Some(instanceOffset));
+    masm.Pop(InstanceReg);
+  } else {
+    using Fn = int64_t (*)(int, int);
+    masm.setupAlignedABICall();
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    masm.callWithABI<Fn, __aeabi_uidivmod>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+  }
+
+  if (mod) {
+    MOZ_ASSERT(output == r0, "output should not be r1 for mod");
+    masm.move32(r1, output);
+  }
+
+  // uidivmod returns the quotient in r0, and the remainder in r1.
+  if (div && !div->canTruncateRemainder()) {
+    MOZ_ASSERT(div->fallible());
+    masm.as_cmp(r1, Imm8(0));
+    bailoutIf(Assembler::NonZero, ins->snapshot());
+  }
+
+  // Bailout for big unsigned results
+  if ((div && !div->isTruncated()) || (mod && !mod->isTruncated())) {
+    DebugOnly<bool> isFallible =
+        (div && div->fallible()) || (mod && mod->fallible());
+    MOZ_ASSERT(isFallible);
+    masm.as_cmp(output, Imm8(0));
+    bailoutIf(Assembler::LessThan, ins->snapshot());
+  }
+
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitEffectiveAddress(LEffectiveAddress* ins) {
+  const MEffectiveAddress* mir = ins->mir();
+  Register base = ToRegister(ins->base());
+  Register index = ToRegister(ins->index());
+  Register output = ToRegister(ins->output());
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.as_add(output, base, lsl(index, mir->scale()));
+  masm.ma_add(Imm32(mir->displacement()), output, scratch);
+}
+
+void CodeGenerator::visitNegI(LNegI* ins) {
+  Register input = ToRegister(ins->input());
+  masm.ma_neg(input, ToRegister(ins->output()));
+}
+
+void CodeGenerator::visitNegI64(LNegI64* ins) {
+  Register64 input = ToRegister64(ins->getInt64Operand(0));
+  MOZ_ASSERT(input == ToOutRegister64(ins));
+  masm.neg64(input);
+}
+
+void CodeGenerator::visitNegD(LNegD* ins) {
+  FloatRegister input = ToFloatRegister(ins->input());
+  masm.ma_vneg(input, ToFloatRegister(ins->output()));
+}
+
+void CodeGenerator::visitNegF(LNegF* ins) {
+  FloatRegister input = ToFloatRegister(ins->input());
+  masm.ma_vneg_f32(input, ToFloatRegister(ins->output()));
+}
+
+void CodeGenerator::visitMemoryBarrier(LMemoryBarrier* ins) {
+  masm.memoryBarrier(ins->type());
+}
+
+void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) {
+  auto input = ToFloatRegister(lir->input());
+  auto output = ToRegister(lir->output());
+
+  MWasmTruncateToInt32* mir = lir->mir();
+  MIRType fromType = mir->input()->type();
+
+  OutOfLineWasmTruncateCheck* ool = nullptr;
+  Label* oolEntry = nullptr;
+  if (!lir->mir()->isSaturating()) {
+    ool = new (alloc())
+        OutOfLineWasmTruncateCheck(mir, input, Register::Invalid());
+    addOutOfLineCode(ool, mir);
+    oolEntry = ool->entry();
+  }
+
+  masm.wasmTruncateToInt32(input, output, fromType, mir->isUnsigned(),
+                           mir->isSaturating(), oolEntry);
+
+  if (!lir->mir()->isSaturating()) {
+    masm.bind(ool->rejoin());
+  }
+}
+
+void CodeGenerator::visitWasmTruncateToInt64(LWasmTruncateToInt64* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MOZ_ASSERT(ToRegister(lir->instance()) == InstanceReg);
+  masm.Push(InstanceReg);
+  int32_t framePushedAfterInstance = masm.framePushed();
+
+  FloatRegister input = ToFloatRegister(lir->input());
+  FloatRegister inputDouble = input;
+  Register64 output = ToOutRegister64(lir);
+
+  MWasmBuiltinTruncateToInt64* mir = lir->mir();
+  MIRType fromType = mir->input()->type();
+
+  OutOfLineWasmTruncateCheck* ool = nullptr;
+  if (!lir->mir()->isSaturating()) {
+    ool = new (alloc())
+        OutOfLineWasmTruncateCheck(mir, input, Register64::Invalid());
+    addOutOfLineCode(ool, mir);
+  }
+
+  ScratchDoubleScope fpscratch(masm);
+  if (fromType == MIRType::Float32) {
+    inputDouble = fpscratch;
+    masm.convertFloat32ToDouble(input, inputDouble);
+  }
+
+  masm.Push(input);
+
+  masm.setupWasmABICall();
+  masm.passABIArg(inputDouble, MoveOp::DOUBLE);
+
+  int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+  if (lir->mir()->isSaturating()) {
+    if (lir->mir()->isUnsigned()) {
+      masm.callWithABI(mir->bytecodeOffset(),
+                       wasm::SymbolicAddress::SaturatingTruncateDoubleToUint64,
+                       mozilla::Some(instanceOffset));
+    } else {
+      masm.callWithABI(mir->bytecodeOffset(),
+                       wasm::SymbolicAddress::SaturatingTruncateDoubleToInt64,
+                       mozilla::Some(instanceOffset));
+    }
+  } else {
+    if (lir->mir()->isUnsigned()) {
+      masm.callWithABI(mir->bytecodeOffset(),
+                       wasm::SymbolicAddress::TruncateDoubleToUint64,
+                       mozilla::Some(instanceOffset));
+    } else {
+      masm.callWithABI(mir->bytecodeOffset(),
+                       wasm::SymbolicAddress::TruncateDoubleToInt64,
+                       mozilla::Some(instanceOffset));
+    }
+  }
+
+  masm.Pop(input);
+  masm.Pop(InstanceReg);
+
+  // TruncateDoubleTo{UI,I}nt64 returns 0x8000000000000000 to indicate
+  // exceptional results, so check for that and produce the appropriate
+  // traps. The Saturating form always returns a normal value and never
+  // needs traps.
+  if (!lir->mir()->isSaturating()) {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_cmp(output.high, Imm32(0x80000000), scratch);
+    masm.as_cmp(output.low, Imm8(0x00000000), Assembler::Equal);
+    masm.ma_b(ool->entry(), Assembler::Equal);
+
+    masm.bind(ool->rejoin());
+  }
+
+  MOZ_ASSERT(ReturnReg64 == output);
+}
+
+void CodeGeneratorARM::visitOutOfLineWasmTruncateCheck(
+    OutOfLineWasmTruncateCheck* ool) {
+  // On ARM, saturating truncation codegen handles saturating itself rather than
+  // relying on out-of-line fixup code.
+  if (ool->isSaturating()) {
+    return;
+  }
+
+  masm.outOfLineWasmTruncateToIntCheck(ool->input(), ool->fromType(),
+                                       ool->toType(), ool->isUnsigned(),
+                                       ool->rejoin(), ool->bytecodeOffset());
+}
+
+void CodeGenerator::visitInt64ToFloatingPointCall(
+    LInt64ToFloatingPointCall* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MOZ_ASSERT(ToRegister(lir->getOperand(LInt64ToFloatingPointCall::Instance)) ==
+             InstanceReg);
+  masm.Push(InstanceReg);
+  int32_t framePushedAfterInstance = masm.framePushed();
+
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+
+  MBuiltinInt64ToFloatingPoint* mir = lir->mir();
+  MIRType toType = mir->type();
+
+  masm.setupWasmABICall();
+  masm.passABIArg(input.high);
+  masm.passABIArg(input.low);
+
+  bool isUnsigned = mir->isUnsigned();
+  wasm::SymbolicAddress callee =
+      toType == MIRType::Float32
+          ? (isUnsigned ? wasm::SymbolicAddress::Uint64ToFloat32
+                        : wasm::SymbolicAddress::Int64ToFloat32)
+          : (isUnsigned ? wasm::SymbolicAddress::Uint64ToDouble
+                        : wasm::SymbolicAddress::Int64ToDouble);
+
+  int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+  MoveOp::Type result =
+      toType == MIRType::Float32 ? MoveOp::FLOAT32 : MoveOp::DOUBLE;
+  masm.callWithABI(mir->bytecodeOffset(), callee, mozilla::Some(instanceOffset),
+                   result);
+
+  DebugOnly<FloatRegister> output(ToFloatRegister(lir->output()));
+  MOZ_ASSERT_IF(toType == MIRType::Double, output.value == ReturnDoubleReg);
+  MOZ_ASSERT_IF(toType == MIRType::Float32, output.value == ReturnFloat32Reg);
+
+  masm.Pop(InstanceReg);
+}
+
+void CodeGenerator::visitCopySignF(LCopySignF* ins) {
+  FloatRegister lhs = ToFloatRegister(ins->getOperand(0));
+  FloatRegister rhs = ToFloatRegister(ins->getOperand(1));
+  FloatRegister output = ToFloatRegister(ins->getDef(0));
+
+  Register lhsi = ToRegister(ins->getTemp(0));
+  Register rhsi = ToRegister(ins->getTemp(1));
+
+  masm.ma_vxfer(lhs, lhsi);
+  masm.ma_vxfer(rhs, rhsi);
+
+  ScratchRegisterScope scratch(masm);
+
+  // Clear lhs's sign.
+  masm.ma_and(Imm32(INT32_MAX), lhsi, lhsi, scratch);
+
+  // Keep rhs's sign.
+  masm.ma_and(Imm32(INT32_MIN), rhsi, rhsi, scratch);
+
+  // Combine.
+  masm.ma_orr(lhsi, rhsi, rhsi);
+
+  masm.ma_vxfer(rhsi, output);
+}
+
+void CodeGenerator::visitCopySignD(LCopySignD* ins) {
+  FloatRegister lhs = ToFloatRegister(ins->getOperand(0));
+  FloatRegister rhs = ToFloatRegister(ins->getOperand(1));
+  FloatRegister output = ToFloatRegister(ins->getDef(0));
+
+  Register lhsi = ToRegister(ins->getTemp(0));
+  Register rhsi = ToRegister(ins->getTemp(1));
+
+  // Manipulate high words of double inputs.
+  masm.as_vxfer(lhsi, InvalidReg, lhs, Assembler::FloatToCore,
+                Assembler::Always, 1);
+  masm.as_vxfer(rhsi, InvalidReg, rhs, Assembler::FloatToCore,
+                Assembler::Always, 1);
+
+  ScratchRegisterScope scratch(masm);
+
+  // Clear lhs's sign.
+  masm.ma_and(Imm32(INT32_MAX), lhsi, lhsi, scratch);
+
+  // Keep rhs's sign.
+  masm.ma_and(Imm32(INT32_MIN), rhsi, rhsi, scratch);
+
+  // Combine.
+  masm.ma_orr(lhsi, rhsi, rhsi);
+
+  // Reconstruct the output.
+  masm.as_vxfer(lhsi, InvalidReg, lhs, Assembler::FloatToCore,
+                Assembler::Always, 0);
+  masm.ma_vxfer(lhsi, rhsi, output);
+}
+
+void CodeGenerator::visitWrapInt64ToInt32(LWrapInt64ToInt32* lir) {
+  const LInt64Allocation& input = lir->getInt64Operand(0);
+  Register output = ToRegister(lir->output());
+
+  if (lir->mir()->bottomHalf()) {
+    masm.move32(ToRegister(input.low()), output);
+  } else {
+    masm.move32(ToRegister(input.high()), output);
+  }
+}
+
+void CodeGenerator::visitExtendInt32ToInt64(LExtendInt32ToInt64* lir) {
+  Register64 output = ToOutRegister64(lir);
+  MOZ_ASSERT(ToRegister(lir->input()) == output.low);
+
+  if (lir->mir()->isUnsigned()) {
+    masm.ma_mov(Imm32(0), output.high);
+  } else {
+    masm.ma_asr(Imm32(31), output.low, output.high);
+  }
+}
+
+void CodeGenerator::visitSignExtendInt64(LSignExtendInt64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+  switch (lir->mode()) {
+    case MSignExtendInt64::Byte:
+      masm.move8SignExtend(input.low, output.low);
+      break;
+    case MSignExtendInt64::Half:
+      masm.move16SignExtend(input.low, output.low);
+      break;
+    case MSignExtendInt64::Word:
+      masm.move32(input.low, output.low);
+      break;
+  }
+  masm.ma_asr(Imm32(31), output.low, output.high);
+}
+
+void CodeGenerator::visitWasmExtendU32Index(LWasmExtendU32Index*) {
+  MOZ_CRASH("64-bit only");
+}
+
+void CodeGenerator::visitWasmWrapU32Index(LWasmWrapU32Index* lir) {
+  // Generates no code on this platform because we just return the low part of
+  // the input register pair.
+  MOZ_ASSERT(ToRegister(lir->input()) == ToRegister(lir->output()));
+}
+
+void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MOZ_ASSERT(ToRegister(lir->getOperand(LDivOrModI64::Instance)) ==
+             InstanceReg);
+  masm.Push(InstanceReg);
+  int32_t framePushedAfterInstance = masm.framePushed();
+
+  Register64 lhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Lhs));
+  Register64 rhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Rhs));
+  Register64 output = ToOutRegister64(lir);
+
+  MOZ_ASSERT(output == ReturnReg64);
+
+  Label done;
+
+  // Handle divide by zero.
+  if (lir->canBeDivideByZero()) {
+    Label nonZero;
+    // We can use InstanceReg as temp register because we preserved it
+    // before.
+    masm.branchTest64(Assembler::NonZero, rhs, rhs, InstanceReg, &nonZero);
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset());
+    masm.bind(&nonZero);
+  }
+
+  auto* mir = lir->mir();
+
+  // Handle an integer overflow exception from INT64_MIN / -1.
+  if (lir->canBeNegativeOverflow()) {
+    Label notmin;
+    masm.branch64(Assembler::NotEqual, lhs, Imm64(INT64_MIN), &notmin);
+    masm.branch64(Assembler::NotEqual, rhs, Imm64(-1), &notmin);
+    if (mir->isWasmBuiltinModI64()) {
+      masm.xor64(output, output);
+    } else {
+      masm.wasmTrap(wasm::Trap::IntegerOverflow, lir->bytecodeOffset());
+    }
+    masm.jump(&done);
+    masm.bind(&notmin);
+  }
+
+  masm.setupWasmABICall();
+  masm.passABIArg(lhs.high);
+  masm.passABIArg(lhs.low);
+  masm.passABIArg(rhs.high);
+  masm.passABIArg(rhs.low);
+
+  int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+  if (mir->isWasmBuiltinModI64()) {
+    masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::ModI64,
+                     mozilla::Some(instanceOffset));
+  } else {
+    masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::DivI64,
+                     mozilla::Some(instanceOffset));
+  }
+
+  MOZ_ASSERT(ReturnReg64 == output);
+
+  masm.bind(&done);
+  masm.Pop(InstanceReg);
+}
+
+void CodeGenerator::visitUDivOrModI64(LUDivOrModI64* lir) {
+  MOZ_ASSERT(gen->compilingWasm());
+  MOZ_ASSERT(ToRegister(lir->getOperand(LDivOrModI64::Instance)) ==
+             InstanceReg);
+  masm.Push(InstanceReg);
+  int32_t framePushedAfterInstance = masm.framePushed();
+
+  Register64 lhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Lhs));
+  Register64 rhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Rhs));
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ReturnReg64);
+
+  // Prevent divide by zero.
+  if (lir->canBeDivideByZero()) {
+    Label nonZero;
+    // We can use InstanceReg as temp register because we preserved it
+    // before.
+    masm.branchTest64(Assembler::NonZero, rhs, rhs, InstanceReg, &nonZero);
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset());
+    masm.bind(&nonZero);
+  }
+
+  masm.setupWasmABICall();
+  masm.passABIArg(lhs.high);
+  masm.passABIArg(lhs.low);
+  masm.passABIArg(rhs.high);
+  masm.passABIArg(rhs.low);
+
+  MDefinition* mir = lir->mir();
+  int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance;
+  if (mir->isWasmBuiltinModI64()) {
+    masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::UModI64,
+                     mozilla::Some(instanceOffset));
+  } else {
+    masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::UDivI64,
+                     mozilla::Some(instanceOffset));
+  }
+  masm.Pop(InstanceReg);
+}
+
+void CodeGenerator::visitCompareI64(LCompareI64* lir) {
+  MCompare* mir = lir->mir();
+  MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 ||
+             mir->compareType() == MCompare::Compare_UInt64);
+
+  const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs);
+  Register64 lhsRegs = ToRegister64(lhs);
+  Register output = ToRegister(lir->output());
+
+  bool isSigned = mir->compareType() == MCompare::Compare_Int64;
+  Assembler::Condition condition = JSOpToCondition(lir->jsop(), isSigned);
+  Label done;
+
+  masm.move32(Imm32(1), output);
+
+  if (IsConstant(rhs)) {
+    Imm64 imm = Imm64(ToInt64(rhs));
+    masm.branch64(condition, lhsRegs, imm, &done);
+  } else {
+    Register64 rhsRegs = ToRegister64(rhs);
+    masm.branch64(condition, lhsRegs, rhsRegs, &done);
+  }
+
+  masm.move32(Imm32(0), output);
+  masm.bind(&done);
+}
+
+void CodeGenerator::visitCompareI64AndBranch(LCompareI64AndBranch* lir) {
+  MCompare* mir = lir->cmpMir();
+  MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 ||
+             mir->compareType() == MCompare::Compare_UInt64);
+
+  const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs);
+  Register64 lhsRegs = ToRegister64(lhs);
+
+  bool isSigned = mir->compareType() == MCompare::Compare_Int64;
+  Assembler::Condition condition = JSOpToCondition(lir->jsop(), isSigned);
+
+  Label* trueLabel = getJumpLabelForBranch(lir->ifTrue());
+  Label* falseLabel = getJumpLabelForBranch(lir->ifFalse());
+
+  if (isNextBlock(lir->ifFalse()->lir())) {
+    falseLabel = nullptr;
+  } else if (isNextBlock(lir->ifTrue()->lir())) {
+    condition = Assembler::InvertCondition(condition);
+    trueLabel = falseLabel;
+    falseLabel = nullptr;
+  }
+
+  if (IsConstant(rhs)) {
+    Imm64 imm = Imm64(ToInt64(rhs));
+    masm.branch64(condition, lhsRegs, imm, trueLabel, falseLabel);
+  } else {
+    Register64 rhsRegs = ToRegister64(rhs);
+    masm.branch64(condition, lhsRegs, rhsRegs, trueLabel, falseLabel);
+  }
+}
+
+void CodeGenerator::visitShiftI64(LShiftI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs);
+  LAllocation* rhs = lir->getOperand(LShiftI64::Rhs);
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  if (rhs->isConstant()) {
+    int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
+    switch (lir->bitop()) {
+      case JSOp::Lsh:
+        if (shift) {
+          masm.lshift64(Imm32(shift), ToRegister64(lhs));
+        }
+        break;
+      case JSOp::Rsh:
+        if (shift) {
+          masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs));
+        }
+        break;
+      case JSOp::Ursh:
+        if (shift) {
+          masm.rshift64(Imm32(shift), ToRegister64(lhs));
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+    return;
+  }
+
+  switch (lir->bitop()) {
+    case JSOp::Lsh:
+      masm.lshift64(ToRegister(rhs), ToRegister64(lhs));
+      break;
+    case JSOp::Rsh:
+      masm.rshift64Arithmetic(ToRegister(rhs), ToRegister64(lhs));
+      break;
+    case JSOp::Ursh:
+      masm.rshift64(ToRegister(rhs), ToRegister64(lhs));
+      break;
+    default:
+      MOZ_CRASH("Unexpected shift op");
+  }
+}
+
+void CodeGenerator::visitBitOpI64(LBitOpI64* lir) {
+  const LInt64Allocation lhs = lir->getInt64Operand(LBitOpI64::Lhs);
+  const LInt64Allocation rhs = lir->getInt64Operand(LBitOpI64::Rhs);
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  switch (lir->bitop()) {
+    case JSOp::BitOr:
+      if (IsConstant(rhs)) {
+        masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+      } else {
+        masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+      }
+      break;
+    case JSOp::BitXor:
+      if (IsConstant(rhs)) {
+        masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+      } else {
+        masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+      }
+      break;
+    case JSOp::BitAnd:
+      if (IsConstant(rhs)) {
+        masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+      } else {
+        masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+      }
+      break;
+    default:
+      MOZ_CRASH("unexpected binary opcode");
+  }
+}
+
+void CodeGenerator::visitRotateI64(LRotateI64* lir) {
+  MRotate* mir = lir->mir();
+  LAllocation* count = lir->count();
+
+  Register64 input = ToRegister64(lir->input());
+  Register64 output = ToOutRegister64(lir);
+  Register temp = ToTempRegisterOrInvalid(lir->temp());
+
+  if (count->isConstant()) {
+    int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F);
+    if (!c) {
+      masm.move64(input, output);
+      return;
+    }
+    if (mir->isLeftRotate()) {
+      masm.rotateLeft64(Imm32(c), input, output, temp);
+    } else {
+      masm.rotateRight64(Imm32(c), input, output, temp);
+    }
+  } else {
+    if (mir->isLeftRotate()) {
+      masm.rotateLeft64(ToRegister(count), input, output, temp);
+    } else {
+      masm.rotateRight64(ToRegister(count), input, output, temp);
+    }
+  }
+}
+
+void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) {
+  const MWasmStackArg* mir = ins->mir();
+  Address dst(StackPointer, mir->spOffset());
+  if (IsConstant(ins->arg())) {
+    masm.store64(Imm64(ToInt64(ins->arg())), dst);
+  } else {
+    masm.store64(ToRegister64(ins->arg()), dst);
+  }
+}
+
+void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) {
+  Register cond = ToRegister(lir->condExpr());
+  const LInt64Allocation falseExpr = lir->falseExpr();
+
+  Register64 out = ToOutRegister64(lir);
+  MOZ_ASSERT(ToRegister64(lir->trueExpr()) == out,
+             "true expr is reused for input");
+
+  masm.as_cmp(cond, Imm8(0));
+  if (falseExpr.low().isRegister()) {
+    masm.ma_mov(ToRegister(falseExpr.low()), out.low, LeaveCC,
+                Assembler::Equal);
+    masm.ma_mov(ToRegister(falseExpr.high()), out.high, LeaveCC,
+                Assembler::Equal);
+  } else {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_ldr(ToAddress(falseExpr.low()), out.low, scratch, Offset,
+                Assembler::Equal);
+    masm.ma_ldr(ToAddress(falseExpr.high()), out.high, scratch, Offset,
+                Assembler::Equal);
+  }
+}
+
+void CodeGenerator::visitWasmReinterpretFromI64(LWasmReinterpretFromI64* lir) {
+  MOZ_ASSERT(lir->mir()->type() == MIRType::Double);
+  MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Int64);
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  FloatRegister output = ToFloatRegister(lir->output());
+
+  masm.ma_vxfer(input.low, input.high, output);
+}
+
+void CodeGenerator::visitWasmReinterpretToI64(LWasmReinterpretToI64* lir) {
+  MOZ_ASSERT(lir->mir()->type() == MIRType::Int64);
+  MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Double);
+  FloatRegister input = ToFloatRegister(lir->getOperand(0));
+  Register64 output = ToOutRegister64(lir);
+
+  masm.ma_vxfer(input, output.low, output.high);
+}
+
+void CodeGenerator::visitPopcntI64(LPopcntI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+  Register temp = ToRegister(lir->getTemp(0));
+
+  masm.popcnt64(input, output, temp);
+}
+
+void CodeGenerator::visitClzI64(LClzI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+
+  masm.clz64(input, output.low);
+  masm.move32(Imm32(0), output.high);
+}
+
+void CodeGenerator::visitCtzI64(LCtzI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  Register64 output = ToOutRegister64(lir);
+
+  masm.ctz64(input, output.low);
+  masm.move32(Imm32(0), output.high);
+}
+
+void CodeGenerator::visitBitNotI64(LBitNotI64* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+  MOZ_ASSERT(input == ToOutRegister64(lir));
+  masm.ma_mvn(input.high, input.high);
+  masm.ma_mvn(input.low, input.low);
+}
+
+void CodeGenerator::visitTestI64AndBranch(LTestI64AndBranch* lir) {
+  Register64 input = ToRegister64(lir->getInt64Operand(0));
+
+  masm.as_cmp(input.high, Imm8(0));
+  jumpToBlock(lir->ifTrue(), Assembler::NonZero);
+  masm.as_cmp(input.low, Imm8(0));
+  emitBranch(Assembler::NonZero, lir->ifTrue(), lir->ifFalse());
+}
+
+void CodeGenerator::visitWasmAtomicLoadI64(LWasmAtomicLoadI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 output = ToOutRegister64(lir);
+  Register64 tmp(InvalidReg, InvalidReg);
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->mir()->access().offset());
+  masm.wasmAtomicLoad64(lir->mir()->access(), addr, tmp, output);
+}
+
+void CodeGenerator::visitWasmAtomicStoreI64(LWasmAtomicStoreI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 value = ToRegister64(lir->value());
+  Register64 tmp(ToRegister(lir->tmpHigh()), ToRegister(lir->tmpLow()));
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->mir()->access().offset());
+  masm.wasmAtomicExchange64(lir->mir()->access(), addr, value, tmp);
+}
+
+void CodeGenerator::visitWasmCompareExchangeI64(LWasmCompareExchangeI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 expected = ToRegister64(lir->expected());
+  Register64 replacement = ToRegister64(lir->replacement());
+  Register64 out = ToOutRegister64(lir);
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->mir()->access().offset());
+  masm.wasmCompareExchange64(lir->mir()->access(), addr, expected, replacement,
+                             out);
+}
+
+void CodeGenerator::visitWasmAtomicBinopI64(LWasmAtomicBinopI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 value = ToRegister64(lir->value());
+  Register64 out = ToOutRegister64(lir);
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->access().offset());
+  Register64 tmp(ToRegister(lir->tmpHigh()), ToRegister(lir->tmpLow()));
+  masm.wasmAtomicFetchOp64(lir->access(), lir->operation(), value, addr, tmp,
+                           out);
+}
+
+void CodeGenerator::visitWasmAtomicExchangeI64(LWasmAtomicExchangeI64* lir) {
+  Register ptr = ToRegister(lir->ptr());
+  Register64 value = ToRegister64(lir->value());
+  Register64 out = ToOutRegister64(lir);
+
+  BaseIndex addr(HeapReg, ptr, TimesOne, lir->access().offset());
+  masm.wasmAtomicExchange64(lir->access(), addr, value, out);
+}
+
+void CodeGenerator::visitNearbyInt(LNearbyInt*) { MOZ_CRASH("NYI"); }
+
+void CodeGenerator::visitNearbyIntF(LNearbyIntF*) { MOZ_CRASH("NYI"); }
+
+void CodeGenerator::visitSimd128(LSimd128* ins) { MOZ_CRASH("No SIMD"); }
+
+void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmBinarySimd128WithConstant(
+    LWasmBinarySimd128WithConstant* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmVariableShiftSimd128(
+    LWasmVariableShiftSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmConstantShiftSimd128(
+    LWasmConstantShiftSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmSignReplicationSimd128(
+    LWasmSignReplicationSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReplaceInt64LaneSimd128(
+    LWasmReplaceInt64LaneSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReduceAndBranchSimd128(
+    LWasmReduceAndBranchSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmReduceSimd128ToInt64(
+    LWasmReduceSimd128ToInt64* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) {
+  MOZ_CRASH("No SIMD");
+}
diff --git a/js/src/jit/arm/CodeGenerator-arm.h b/js/src/jit/arm/CodeGenerator-arm.h
new file mode 100644
index 0000000000..f7cf2b263e
--- /dev/null
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -0,0 +1,172 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_CodeGenerator_arm_h
+#define jit_arm_CodeGenerator_arm_h
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/shared/CodeGenerator-shared.h"
+#include "js/ScalarType.h"  // js::Scalar::Type
+
+namespace js {
+namespace jit {
+
+class CodeGeneratorARM;
+class OutOfLineBailout;
+class OutOfLineTableSwitch;
+
+using OutOfLineWasmTruncateCheck =
+    OutOfLineWasmTruncateCheckBase<CodeGeneratorARM>;
+
+class CodeGeneratorARM : public CodeGeneratorShared {
+  friend class MoveResolverARM;
+
+ protected:
+  CodeGeneratorARM(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
+
+  NonAssertingLabel deoptLabel_;
+
+  MoveOperand toMoveOperand(LAllocation a) const;
+
+  void bailoutIf(Assembler::Condition condition, LSnapshot* snapshot);
+  void bailoutFrom(Label* label, LSnapshot* snapshot);
+  void bailout(LSnapshot* snapshot);
+
+  template <typename T1, typename T2>
+  void bailoutCmpPtr(Assembler::Condition c, T1 lhs, T2 rhs,
+                     LSnapshot* snapshot) {
+    masm.cmpPtr(lhs, rhs);
+    bailoutIf(c, snapshot);
+  }
+  void bailoutTestPtr(Assembler::Condition c, Register lhs, Register rhs,
+                      LSnapshot* snapshot) {
+    masm.testPtr(lhs, rhs);
+    bailoutIf(c, snapshot);
+  }
+  template <typename T1, typename T2>
+  void bailoutCmp32(Assembler::Condition c, T1 lhs, T2 rhs,
+                    LSnapshot* snapshot) {
+    masm.cmp32(lhs, rhs);
+    bailoutIf(c, snapshot);
+  }
+  template <typename T1, typename T2>
+  void bailoutTest32(Assembler::Condition c, T1 lhs, T2 rhs,
+                     LSnapshot* snapshot) {
+    masm.test32(lhs, rhs);
+    bailoutIf(c, snapshot);
+  }
+  void bailoutIfFalseBool(Register reg, LSnapshot* snapshot) {
+    masm.test32(reg, Imm32(0xFF));
+    bailoutIf(Assembler::Zero, snapshot);
+  }
+
+  template <class T>
+  void generateUDivModZeroCheck(Register rhs, Register output, Label* done,
+                                LSnapshot* snapshot, T* mir);
+
+  bool generateOutOfLineCode();
+
+  // Emits a branch that directs control flow to the true block if |cond| is
+  // true, and the false block if |cond| is false.
+  void emitBranch(Assembler::Condition cond, MBasicBlock* ifTrue,
+                  MBasicBlock* ifFalse);
+
+  void testNullEmitBranch(Assembler::Condition cond, const ValueOperand& value,
+                          MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+    cond = masm.testNull(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testUndefinedEmitBranch(Assembler::Condition cond,
+                               const ValueOperand& value, MBasicBlock* ifTrue,
+                               MBasicBlock* ifFalse) {
+    cond = masm.testUndefined(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testObjectEmitBranch(Assembler::Condition cond,
+                            const ValueOperand& value, MBasicBlock* ifTrue,
+                            MBasicBlock* ifFalse) {
+    cond = masm.testObject(cond, value);
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+  void testZeroEmitBranch(Assembler::Condition cond, Register reg,
+                          MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+    MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+    masm.cmpPtr(reg, ImmWord(0));
+    emitBranch(cond, ifTrue, ifFalse);
+  }
+
+  void emitTableSwitchDispatch(MTableSwitch* mir, Register index,
+                               Register base);
+
+  void emitBigIntDiv(LBigIntDiv* ins, Register dividend, Register divisor,
+                     Register output, Label* fail);
+  void emitBigIntMod(LBigIntMod* ins, Register dividend, Register divisor,
+                     Register output, Label* fail);
+
+  template <typename T>
+  void emitWasmLoad(T* ins);
+  template <typename T>
+  void emitWasmUnalignedLoad(T* ins);
+  template <typename T>
+  void emitWasmStore(T* ins);
+  template <typename T>
+  void emitWasmUnalignedStore(T* ins);
+
+  ValueOperand ToValue(LInstruction* ins, size_t pos);
+  ValueOperand ToTempValue(LInstruction* ins, size_t pos);
+
+  Register64 ToOperandOrRegister64(const LInt64Allocation input);
+
+  // Functions for LTestVAndBranch.
+  void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag);
+
+  void divICommon(MDiv* mir, Register lhs, Register rhs, Register output,
+                  LSnapshot* snapshot, Label& done);
+  void modICommon(MMod* mir, Register lhs, Register rhs, Register output,
+                  LSnapshot* snapshot, Label& done);
+
+  void generateInvalidateEpilogue();
+
+  // Generating a result.
+  template <typename S, typename T>
+  void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                  const S& value, const T& mem,
+                                  Register flagTemp, Register outTemp,
+                                  AnyRegister output);
+
+  // Generating no result.
+  template <typename S, typename T>
+  void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                  const S& value, const T& mem,
+                                  Register flagTemp);
+
+ public:
+  void visitOutOfLineBailout(OutOfLineBailout* ool);
+  void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool);
+  void visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool);
+};
+
+typedef CodeGeneratorARM CodeGeneratorSpecific;
+
+// An out-of-line bailout thunk.
+class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM> {
+ protected:  // Silence Clang warning.
+  LSnapshot* snapshot_;
+  uint32_t frameSize_;
+
+ public:
+  OutOfLineBailout(LSnapshot* snapshot, uint32_t frameSize)
+      : snapshot_(snapshot), frameSize_(frameSize) {}
+
+  void accept(CodeGeneratorARM* codegen) override;
+
+  LSnapshot* snapshot() const { return snapshot_; }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_CodeGenerator_arm_h */
diff --git a/js/src/jit/arm/DoubleEntryTable.tbl b/js/src/jit/arm/DoubleEntryTable.tbl
new file mode 100644
index 0000000000..2e9e8c4a34
--- /dev/null
+++ b/js/src/jit/arm/DoubleEntryTable.tbl
@@ -0,0 +1,257 @@
+/* THIS FILE IS AUTOMATICALLY GENERATED BY gen-double-encode-table.py.  */
+  { 0x40000000, { 0, 0, 0 } },
+  { 0x40010000, { 1, 0, 0 } },
+  { 0x40020000, { 2, 0, 0 } },
+  { 0x40030000, { 3, 0, 0 } },
+  { 0x40040000, { 4, 0, 0 } },
+  { 0x40050000, { 5, 0, 0 } },
+  { 0x40060000, { 6, 0, 0 } },
+  { 0x40070000, { 7, 0, 0 } },
+  { 0x40080000, { 8, 0, 0 } },
+  { 0x40090000, { 9, 0, 0 } },
+  { 0x400a0000, { 10, 0, 0 } },
+  { 0x400b0000, { 11, 0, 0 } },
+  { 0x400c0000, { 12, 0, 0 } },
+  { 0x400d0000, { 13, 0, 0 } },
+  { 0x400e0000, { 14, 0, 0 } },
+  { 0x400f0000, { 15, 0, 0 } },
+  { 0x40100000, { 0, 1, 0 } },
+  { 0x40110000, { 1, 1, 0 } },
+  { 0x40120000, { 2, 1, 0 } },
+  { 0x40130000, { 3, 1, 0 } },
+  { 0x40140000, { 4, 1, 0 } },
+  { 0x40150000, { 5, 1, 0 } },
+  { 0x40160000, { 6, 1, 0 } },
+  { 0x40170000, { 7, 1, 0 } },
+  { 0x40180000, { 8, 1, 0 } },
+  { 0x40190000, { 9, 1, 0 } },
+  { 0x401a0000, { 10, 1, 0 } },
+  { 0x401b0000, { 11, 1, 0 } },
+  { 0x401c0000, { 12, 1, 0 } },
+  { 0x401d0000, { 13, 1, 0 } },
+  { 0x401e0000, { 14, 1, 0 } },
+  { 0x401f0000, { 15, 1, 0 } },
+  { 0x40200000, { 0, 2, 0 } },
+  { 0x40210000, { 1, 2, 0 } },
+  { 0x40220000, { 2, 2, 0 } },
+  { 0x40230000, { 3, 2, 0 } },
+  { 0x40240000, { 4, 2, 0 } },
+  { 0x40250000, { 5, 2, 0 } },
+  { 0x40260000, { 6, 2, 0 } },
+  { 0x40270000, { 7, 2, 0 } },
+  { 0x40280000, { 8, 2, 0 } },
+  { 0x40290000, { 9, 2, 0 } },
+  { 0x402a0000, { 10, 2, 0 } },
+  { 0x402b0000, { 11, 2, 0 } },
+  { 0x402c0000, { 12, 2, 0 } },
+  { 0x402d0000, { 13, 2, 0 } },
+  { 0x402e0000, { 14, 2, 0 } },
+  { 0x402f0000, { 15, 2, 0 } },
+  { 0x40300000, { 0, 3, 0 } },
+  { 0x40310000, { 1, 3, 0 } },
+  { 0x40320000, { 2, 3, 0 } },
+  { 0x40330000, { 3, 3, 0 } },
+  { 0x40340000, { 4, 3, 0 } },
+  { 0x40350000, { 5, 3, 0 } },
+  { 0x40360000, { 6, 3, 0 } },
+  { 0x40370000, { 7, 3, 0 } },
+  { 0x40380000, { 8, 3, 0 } },
+  { 0x40390000, { 9, 3, 0 } },
+  { 0x403a0000, { 10, 3, 0 } },
+  { 0x403b0000, { 11, 3, 0 } },
+  { 0x403c0000, { 12, 3, 0 } },
+  { 0x403d0000, { 13, 3, 0 } },
+  { 0x403e0000, { 14, 3, 0 } },
+  { 0x403f0000, { 15, 3, 0 } },
+  { 0x3fc00000, { 0, 4, 0 } },
+  { 0x3fc10000, { 1, 4, 0 } },
+  { 0x3fc20000, { 2, 4, 0 } },
+  { 0x3fc30000, { 3, 4, 0 } },
+  { 0x3fc40000, { 4, 4, 0 } },
+  { 0x3fc50000, { 5, 4, 0 } },
+  { 0x3fc60000, { 6, 4, 0 } },
+  { 0x3fc70000, { 7, 4, 0 } },
+  { 0x3fc80000, { 8, 4, 0 } },
+  { 0x3fc90000, { 9, 4, 0 } },
+  { 0x3fca0000, { 10, 4, 0 } },
+  { 0x3fcb0000, { 11, 4, 0 } },
+  { 0x3fcc0000, { 12, 4, 0 } },
+  { 0x3fcd0000, { 13, 4, 0 } },
+  { 0x3fce0000, { 14, 4, 0 } },
+  { 0x3fcf0000, { 15, 4, 0 } },
+  { 0x3fd00000, { 0, 5, 0 } },
+  { 0x3fd10000, { 1, 5, 0 } },
+  { 0x3fd20000, { 2, 5, 0 } },
+  { 0x3fd30000, { 3, 5, 0 } },
+  { 0x3fd40000, { 4, 5, 0 } },
+  { 0x3fd50000, { 5, 5, 0 } },
+  { 0x3fd60000, { 6, 5, 0 } },
+  { 0x3fd70000, { 7, 5, 0 } },
+  { 0x3fd80000, { 8, 5, 0 } },
+  { 0x3fd90000, { 9, 5, 0 } },
+  { 0x3fda0000, { 10, 5, 0 } },
+  { 0x3fdb0000, { 11, 5, 0 } },
+  { 0x3fdc0000, { 12, 5, 0 } },
+  { 0x3fdd0000, { 13, 5, 0 } },
+  { 0x3fde0000, { 14, 5, 0 } },
+  { 0x3fdf0000, { 15, 5, 0 } },
+  { 0x3fe00000, { 0, 6, 0 } },
+  { 0x3fe10000, { 1, 6, 0 } },
+  { 0x3fe20000, { 2, 6, 0 } },
+  { 0x3fe30000, { 3, 6, 0 } },
+  { 0x3fe40000, { 4, 6, 0 } },
+  { 0x3fe50000, { 5, 6, 0 } },
+  { 0x3fe60000, { 6, 6, 0 } },
+  { 0x3fe70000, { 7, 6, 0 } },
+  { 0x3fe80000, { 8, 6, 0 } },
+  { 0x3fe90000, { 9, 6, 0 } },
+  { 0x3fea0000, { 10, 6, 0 } },
+  { 0x3feb0000, { 11, 6, 0 } },
+  { 0x3fec0000, { 12, 6, 0 } },
+  { 0x3fed0000, { 13, 6, 0 } },
+  { 0x3fee0000, { 14, 6, 0 } },
+  { 0x3fef0000, { 15, 6, 0 } },
+  { 0x3ff00000, { 0, 7, 0 } },
+  { 0x3ff10000, { 1, 7, 0 } },
+  { 0x3ff20000, { 2, 7, 0 } },
+  { 0x3ff30000, { 3, 7, 0 } },
+  { 0x3ff40000, { 4, 7, 0 } },
+  { 0x3ff50000, { 5, 7, 0 } },
+  { 0x3ff60000, { 6, 7, 0 } },
+  { 0x3ff70000, { 7, 7, 0 } },
+  { 0x3ff80000, { 8, 7, 0 } },
+  { 0x3ff90000, { 9, 7, 0 } },
+  { 0x3ffa0000, { 10, 7, 0 } },
+  { 0x3ffb0000, { 11, 7, 0 } },
+  { 0x3ffc0000, { 12, 7, 0 } },
+  { 0x3ffd0000, { 13, 7, 0 } },
+  { 0x3ffe0000, { 14, 7, 0 } },
+  { 0x3fff0000, { 15, 7, 0 } },
+  { 0xc0000000, { 0, 8, 0 } },
+  { 0xc0010000, { 1, 8, 0 } },
+  { 0xc0020000, { 2, 8, 0 } },
+  { 0xc0030000, { 3, 8, 0 } },
+  { 0xc0040000, { 4, 8, 0 } },
+  { 0xc0050000, { 5, 8, 0 } },
+  { 0xc0060000, { 6, 8, 0 } },
+  { 0xc0070000, { 7, 8, 0 } },
+  { 0xc0080000, { 8, 8, 0 } },
+  { 0xc0090000, { 9, 8, 0 } },
+  { 0xc00a0000, { 10, 8, 0 } },
+  { 0xc00b0000, { 11, 8, 0 } },
+  { 0xc00c0000, { 12, 8, 0 } },
+  { 0xc00d0000, { 13, 8, 0 } },
+  { 0xc00e0000, { 14, 8, 0 } },
+  { 0xc00f0000, { 15, 8, 0 } },
+  { 0xc0100000, { 0, 9, 0 } },
+  { 0xc0110000, { 1, 9, 0 } },
+  { 0xc0120000, { 2, 9, 0 } },
+  { 0xc0130000, { 3, 9, 0 } },
+  { 0xc0140000, { 4, 9, 0 } },
+  { 0xc0150000, { 5, 9, 0 } },
+  { 0xc0160000, { 6, 9, 0 } },
+  { 0xc0170000, { 7, 9, 0 } },
+  { 0xc0180000, { 8, 9, 0 } },
+  { 0xc0190000, { 9, 9, 0 } },
+  { 0xc01a0000, { 10, 9, 0 } },
+  { 0xc01b0000, { 11, 9, 0 } },
+  { 0xc01c0000, { 12, 9, 0 } },
+  { 0xc01d0000, { 13, 9, 0 } },
+  { 0xc01e0000, { 14, 9, 0 } },
+  { 0xc01f0000, { 15, 9, 0 } },
+  { 0xc0200000, { 0, 10, 0 } },
+  { 0xc0210000, { 1, 10, 0 } },
+  { 0xc0220000, { 2, 10, 0 } },
+  { 0xc0230000, { 3, 10, 0 } },
+  { 0xc0240000, { 4, 10, 0 } },
+  { 0xc0250000, { 5, 10, 0 } },
+  { 0xc0260000, { 6, 10, 0 } },
+  { 0xc0270000, { 7, 10, 0 } },
+  { 0xc0280000, { 8, 10, 0 } },
+  { 0xc0290000, { 9, 10, 0 } },
+  { 0xc02a0000, { 10, 10, 0 } },
+  { 0xc02b0000, { 11, 10, 0 } },
+  { 0xc02c0000, { 12, 10, 0 } },
+  { 0xc02d0000, { 13, 10, 0 } },
+  { 0xc02e0000, { 14, 10, 0 } },
+  { 0xc02f0000, { 15, 10, 0 } },
+  { 0xc0300000, { 0, 11, 0 } },
+  { 0xc0310000, { 1, 11, 0 } },
+  { 0xc0320000, { 2, 11, 0 } },
+  { 0xc0330000, { 3, 11, 0 } },
+  { 0xc0340000, { 4, 11, 0 } },
+  { 0xc0350000, { 5, 11, 0 } },
+  { 0xc0360000, { 6, 11, 0 } },
+  { 0xc0370000, { 7, 11, 0 } },
+  { 0xc0380000, { 8, 11, 0 } },
+  { 0xc0390000, { 9, 11, 0 } },
+  { 0xc03a0000, { 10, 11, 0 } },
+  { 0xc03b0000, { 11, 11, 0 } },
+  { 0xc03c0000, { 12, 11, 0 } },
+  { 0xc03d0000, { 13, 11, 0 } },
+  { 0xc03e0000, { 14, 11, 0 } },
+  { 0xc03f0000, { 15, 11, 0 } },
+  { 0xbfc00000, { 0, 12, 0 } },
+  { 0xbfc10000, { 1, 12, 0 } },
+  { 0xbfc20000, { 2, 12, 0 } },
+  { 0xbfc30000, { 3, 12, 0 } },
+  { 0xbfc40000, { 4, 12, 0 } },
+  { 0xbfc50000, { 5, 12, 0 } },
+  { 0xbfc60000, { 6, 12, 0 } },
+  { 0xbfc70000, { 7, 12, 0 } },
+  { 0xbfc80000, { 8, 12, 0 } },
+  { 0xbfc90000, { 9, 12, 0 } },
+  { 0xbfca0000, { 10, 12, 0 } },
+  { 0xbfcb0000, { 11, 12, 0 } },
+  { 0xbfcc0000, { 12, 12, 0 } },
+  { 0xbfcd0000, { 13, 12, 0 } },
+  { 0xbfce0000, { 14, 12, 0 } },
+  { 0xbfcf0000, { 15, 12, 0 } },
+  { 0xbfd00000, { 0, 13, 0 } },
+  { 0xbfd10000, { 1, 13, 0 } },
+  { 0xbfd20000, { 2, 13, 0 } },
+  { 0xbfd30000, { 3, 13, 0 } },
+  { 0xbfd40000, { 4, 13, 0 } },
+  { 0xbfd50000, { 5, 13, 0 } },
+  { 0xbfd60000, { 6, 13, 0 } },
+  { 0xbfd70000, { 7, 13, 0 } },
+  { 0xbfd80000, { 8, 13, 0 } },
+  { 0xbfd90000, { 9, 13, 0 } },
+  { 0xbfda0000, { 10, 13, 0 } },
+  { 0xbfdb0000, { 11, 13, 0 } },
+  { 0xbfdc0000, { 12, 13, 0 } },
+  { 0xbfdd0000, { 13, 13, 0 } },
+  { 0xbfde0000, { 14, 13, 0 } },
+  { 0xbfdf0000, { 15, 13, 0 } },
+  { 0xbfe00000, { 0, 14, 0 } },
+  { 0xbfe10000, { 1, 14, 0 } },
+  { 0xbfe20000, { 2, 14, 0 } },
+  { 0xbfe30000, { 3, 14, 0 } },
+  { 0xbfe40000, { 4, 14, 0 } },
+  { 0xbfe50000, { 5, 14, 0 } },
+  { 0xbfe60000, { 6, 14, 0 } },
+  { 0xbfe70000, { 7, 14, 0 } },
+  { 0xbfe80000, { 8, 14, 0 } },
+  { 0xbfe90000, { 9, 14, 0 } },
+  { 0xbfea0000, { 10, 14, 0 } },
+  { 0xbfeb0000, { 11, 14, 0 } },
+  { 0xbfec0000, { 12, 14, 0 } },
+  { 0xbfed0000, { 13, 14, 0 } },
+  { 0xbfee0000, { 14, 14, 0 } },
+  { 0xbfef0000, { 15, 14, 0 } },
+  { 0xbff00000, { 0, 15, 0 } },
+  { 0xbff10000, { 1, 15, 0 } },
+  { 0xbff20000, { 2, 15, 0 } },
+  { 0xbff30000, { 3, 15, 0 } },
+  { 0xbff40000, { 4, 15, 0 } },
+  { 0xbff50000, { 5, 15, 0 } },
+  { 0xbff60000, { 6, 15, 0 } },
+  { 0xbff70000, { 7, 15, 0 } },
+  { 0xbff80000, { 8, 15, 0 } },
+  { 0xbff90000, { 9, 15, 0 } },
+  { 0xbffa0000, { 10, 15, 0 } },
+  { 0xbffb0000, { 11, 15, 0 } },
+  { 0xbffc0000, { 12, 15, 0 } },
+  { 0xbffd0000, { 13, 15, 0 } },
+  { 0xbffe0000, { 14, 15, 0 } },
+  { 0xbfff0000, { 15, 15, 0 } },
diff --git a/js/src/jit/arm/LIR-arm.h b/js/src/jit/arm/LIR-arm.h
new file mode 100644
index 0000000000..395b285c93
--- /dev/null
+++ b/js/src/jit/arm/LIR-arm.h
@@ -0,0 +1,511 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_LIR_arm_h
+#define jit_arm_LIR_arm_h
+
+namespace js {
+namespace jit {
+
+class LBoxFloatingPoint : public LInstructionHelper<2, 1, 1> {
+  MIRType type_;
+
+ public:
+  LIR_HEADER(BoxFloatingPoint);
+
+  LBoxFloatingPoint(const LAllocation& in, const LDefinition& temp,
+                    MIRType type)
+      : LInstructionHelper(classOpcode), type_(type) {
+    setOperand(0, in);
+    setTemp(0, temp);
+  }
+
+  MIRType type() const { return type_; }
+  const char* extraName() const { return StringFromMIRType(type_); }
+};
+
+class LUnbox : public LInstructionHelper<1, 2, 0> {
+ public:
+  LIR_HEADER(Unbox);
+
+  LUnbox() : LInstructionHelper(classOpcode) {}
+
+  MUnbox* mir() const { return mir_->toUnbox(); }
+  const LAllocation* payload() { return getOperand(0); }
+  const LAllocation* type() { return getOperand(1); }
+  const char* extraName() const { return StringFromMIRType(mir()->type()); }
+};
+
+class LUnboxFloatingPoint : public LInstructionHelper<1, 2, 0> {
+  MIRType type_;
+
+ public:
+  LIR_HEADER(UnboxFloatingPoint);
+
+  static const size_t Input = 0;
+
+  LUnboxFloatingPoint(const LBoxAllocation& input, MIRType type)
+      : LInstructionHelper(classOpcode), type_(type) {
+    setBoxOperand(Input, input);
+  }
+
+  MUnbox* mir() const { return mir_->toUnbox(); }
+
+  MIRType type() const { return type_; }
+  const char* extraName() const { return StringFromMIRType(type_); }
+};
+
+// Convert a 32-bit unsigned integer to a double.
+class LWasmUint32ToDouble : public LInstructionHelper<1, 1, 0> {
+ public:
+  LIR_HEADER(WasmUint32ToDouble)
+
+  explicit LWasmUint32ToDouble(const LAllocation& input)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, input);
+  }
+};
+
+// Convert a 32-bit unsigned integer to a float32.
+class LWasmUint32ToFloat32 : public LInstructionHelper<1, 1, 0> {
+ public:
+  LIR_HEADER(WasmUint32ToFloat32)
+
+  explicit LWasmUint32ToFloat32(const LAllocation& input)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, input);
+  }
+};
+
+class LDivI : public LBinaryMath<1> {
+ public:
+  LIR_HEADER(DivI);
+
+  LDivI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+    setTemp(0, temp);
+  }
+
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LDivOrModI64
+    : public LCallInstructionHelper<INT64_PIECES, INT64_PIECES * 2 + 1, 0> {
+ public:
+  LIR_HEADER(DivOrModI64)
+
+  static const size_t Lhs = 0;
+  static const size_t Rhs = INT64_PIECES;
+  static const size_t Instance = 2 * INT64_PIECES;
+
+  LDivOrModI64(const LInt64Allocation& lhs, const LInt64Allocation& rhs,
+               const LAllocation& instance)
+      : LCallInstructionHelper(classOpcode) {
+    setInt64Operand(Lhs, lhs);
+    setInt64Operand(Rhs, rhs);
+    setOperand(Instance, instance);
+  }
+
+  MDefinition* mir() const {
+    MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64());
+    return mir_;
+  }
+  bool canBeDivideByZero() const {
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->canBeDivideByZero();
+    }
+    return mir_->toWasmBuiltinDivI64()->canBeDivideByZero();
+  }
+  bool canBeNegativeOverflow() const {
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->canBeNegativeDividend();
+    }
+    return mir_->toWasmBuiltinDivI64()->canBeNegativeOverflow();
+  }
+  wasm::BytecodeOffset bytecodeOffset() const {
+    MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64());
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->bytecodeOffset();
+    }
+    return mir_->toWasmBuiltinDivI64()->bytecodeOffset();
+  }
+};
+
+class LUDivOrModI64
+    : public LCallInstructionHelper<INT64_PIECES, INT64_PIECES * 2 + 1, 0> {
+ public:
+  LIR_HEADER(UDivOrModI64)
+
+  static const size_t Lhs = 0;
+  static const size_t Rhs = INT64_PIECES;
+  static const size_t Instance = 2 * INT64_PIECES;
+
+  LUDivOrModI64(const LInt64Allocation& lhs, const LInt64Allocation& rhs,
+                const LAllocation& instance)
+      : LCallInstructionHelper(classOpcode) {
+    setInt64Operand(Lhs, lhs);
+    setInt64Operand(Rhs, rhs);
+    setOperand(Instance, instance);
+  }
+
+  MDefinition* mir() const {
+    MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64());
+    return mir_;
+  }
+  bool canBeDivideByZero() const {
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->canBeDivideByZero();
+    }
+    return mir_->toWasmBuiltinDivI64()->canBeDivideByZero();
+  }
+  bool canBeNegativeOverflow() const {
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->canBeNegativeDividend();
+    }
+    return mir_->toWasmBuiltinDivI64()->canBeNegativeOverflow();
+  }
+  wasm::BytecodeOffset bytecodeOffset() const {
+    MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64());
+    if (mir_->isWasmBuiltinModI64()) {
+      return mir_->toWasmBuiltinModI64()->bytecodeOffset();
+    }
+    return mir_->toWasmBuiltinDivI64()->bytecodeOffset();
+  }
+};
+
+// LSoftDivI is a software divide for ARM cores that don't support a hardware
+// divide instruction, implemented as a C++ native call.
+class LSoftDivI : public LBinaryCallInstructionHelper<1, 0> {
+ public:
+  LIR_HEADER(SoftDivI);
+
+  LSoftDivI(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryCallInstructionHelper(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LDivPowTwoI : public LInstructionHelper<1, 1, 0> {
+  const int32_t shift_;
+
+ public:
+  LIR_HEADER(DivPowTwoI)
+
+  LDivPowTwoI(const LAllocation& lhs, int32_t shift)
+      : LInstructionHelper(classOpcode), shift_(shift) {
+    setOperand(0, lhs);
+  }
+
+  const LAllocation* numerator() { return getOperand(0); }
+
+  int32_t shift() { return shift_; }
+
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LModI : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(ModI);
+
+  LModI(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryMath(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+class LSoftModI : public LBinaryCallInstructionHelper<1, 1> {
+ public:
+  LIR_HEADER(SoftModI);
+
+  LSoftModI(const LAllocation& lhs, const LAllocation& rhs,
+            const LDefinition& temp)
+      : LBinaryCallInstructionHelper(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+    setTemp(0, temp);
+  }
+
+  const LDefinition* callTemp() { return getTemp(0); }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+class LModPowTwoI : public LInstructionHelper<1, 1, 0> {
+  const int32_t shift_;
+
+ public:
+  LIR_HEADER(ModPowTwoI);
+  int32_t shift() { return shift_; }
+
+  LModPowTwoI(const LAllocation& lhs, int32_t shift)
+      : LInstructionHelper(classOpcode), shift_(shift) {
+    setOperand(0, lhs);
+  }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+class LModMaskI : public LInstructionHelper<1, 1, 2> {
+  const int32_t shift_;
+
+ public:
+  LIR_HEADER(ModMaskI);
+
+  LModMaskI(const LAllocation& lhs, const LDefinition& temp1,
+            const LDefinition& temp2, int32_t shift)
+      : LInstructionHelper(classOpcode), shift_(shift) {
+    setOperand(0, lhs);
+    setTemp(0, temp1);
+    setTemp(1, temp2);
+  }
+
+  int32_t shift() const { return shift_; }
+
+  MMod* mir() const { return mir_->toMod(); }
+};
+
+// Takes a tableswitch with an integer to decide.
+class LTableSwitch : public LInstructionHelper<0, 1, 1> {
+ public:
+  LIR_HEADER(TableSwitch);
+
+  LTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
+               MTableSwitch* ins)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, in);
+    setTemp(0, inputCopy);
+    setMir(ins);
+  }
+
+  MTableSwitch* mir() const { return mir_->toTableSwitch(); }
+
+  const LAllocation* index() { return getOperand(0); }
+  const LDefinition* tempInt() { return getTemp(0); }
+  // This is added to share the same CodeGenerator prefixes.
+  const LDefinition* tempPointer() { return nullptr; }
+};
+
+// Takes a tableswitch with an integer to decide.
+class LTableSwitchV : public LInstructionHelper<0, BOX_PIECES, 2> {
+ public:
+  LIR_HEADER(TableSwitchV);
+
+  LTableSwitchV(const LBoxAllocation& input, const LDefinition& inputCopy,
+                const LDefinition& floatCopy, MTableSwitch* ins)
+      : LInstructionHelper(classOpcode) {
+    setBoxOperand(InputValue, input);
+    setTemp(0, inputCopy);
+    setTemp(1, floatCopy);
+    setMir(ins);
+  }
+
+  MTableSwitch* mir() const { return mir_->toTableSwitch(); }
+
+  static const size_t InputValue = 0;
+
+  const LDefinition* tempInt() { return getTemp(0); }
+  const LDefinition* tempFloat() { return getTemp(1); }
+  const LDefinition* tempPointer() { return nullptr; }
+};
+
+class LMulI : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(MulI);
+
+  LMulI() : LBinaryMath(classOpcode) {}
+
+  MMul* mir() { return mir_->toMul(); }
+};
+
+class LUDiv : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(UDiv);
+
+  LUDiv() : LBinaryMath(classOpcode) {}
+
+  MDiv* mir() { return mir_->toDiv(); }
+};
+
+class LUMod : public LBinaryMath<0> {
+ public:
+  LIR_HEADER(UMod);
+
+  LUMod() : LBinaryMath(classOpcode) {}
+
+  MMod* mir() { return mir_->toMod(); }
+};
+
+class LSoftUDivOrMod : public LBinaryCallInstructionHelper<1, 0> {
+ public:
+  LIR_HEADER(SoftUDivOrMod);
+
+  LSoftUDivOrMod(const LAllocation& lhs, const LAllocation& rhs)
+      : LBinaryCallInstructionHelper(classOpcode) {
+    setOperand(0, lhs);
+    setOperand(1, rhs);
+  }
+
+  MInstruction* mir() { return mir_->toInstruction(); }
+};
+
+class LWasmTruncateToInt64 : public LCallInstructionHelper<INT64_PIECES, 2, 0> {
+  static const size_t Input = 0;
+  static const size_t Instance = 1;
+
+ public:
+  LIR_HEADER(WasmTruncateToInt64);
+
+  LWasmTruncateToInt64(const LAllocation& in, const LAllocation& instance)
+      : LCallInstructionHelper(classOpcode) {
+    setOperand(Input, in);
+    setOperand(Instance, instance);
+  }
+
+  LAllocation* input() { return getOperand(Input); }
+  LAllocation* instance() { return getOperand(Instance); }
+
+  MWasmBuiltinTruncateToInt64* mir() const {
+    return mir_->toWasmBuiltinTruncateToInt64();
+  }
+};
+
+class LInt64ToFloatingPointCall
+    : public LCallInstructionHelper<1, INT64_PIECES + 1, 0> {
+ public:
+  LIR_HEADER(Int64ToFloatingPointCall);
+
+  static const size_t Input = 0;
+  static const size_t Instance = INT64_PIECES;
+
+  LInt64ToFloatingPointCall(const LInt64Allocation& in,
+                            const LAllocation& instance)
+      : LCallInstructionHelper(classOpcode) {
+    setInt64Operand(Input, in);
+    setOperand(Instance, instance);
+  }
+
+  LAllocation* input() { return getOperand(Input); }
+  LAllocation* instance() { return getOperand(Instance); }
+
+  MBuiltinInt64ToFloatingPoint* mir() const {
+    return mir_->toBuiltinInt64ToFloatingPoint();
+  }
+};
+
+class LWasmAtomicLoadI64 : public LInstructionHelper<INT64_PIECES, 1, 0> {
+ public:
+  LIR_HEADER(WasmAtomicLoadI64);
+
+  explicit LWasmAtomicLoadI64(const LAllocation& ptr)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, ptr);
+  }
+
+  MWasmLoad* mir() const { return mir_->toWasmLoad(); }
+  const LAllocation* ptr() { return getOperand(0); }
+};
+
+class LWasmAtomicStoreI64 : public LInstructionHelper<0, 1 + INT64_PIECES, 2> {
+ public:
+  LIR_HEADER(WasmAtomicStoreI64);
+
+  LWasmAtomicStoreI64(const LAllocation& ptr, const LInt64Allocation& value,
+                      const LDefinition& tmpLow, const LDefinition& tmpHigh)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, ptr);
+    setInt64Operand(1, value);
+    setTemp(0, tmpLow);
+    setTemp(1, tmpHigh);
+  }
+
+  MWasmStore* mir() const { return mir_->toWasmStore(); }
+  const LAllocation* ptr() { return getOperand(0); }
+  const LInt64Allocation value() { return getInt64Operand(1); }
+  const LDefinition* tmpLow() { return getTemp(0); }
+  const LDefinition* tmpHigh() { return getTemp(1); }
+};
+
+class LWasmCompareExchangeI64
+    : public LInstructionHelper<INT64_PIECES, 1 + 2 * INT64_PIECES, 0> {
+ public:
+  LIR_HEADER(WasmCompareExchangeI64);
+
+  LWasmCompareExchangeI64(const LAllocation& ptr,
+                          const LInt64Allocation& expected,
+                          const LInt64Allocation& replacement)
+      : LInstructionHelper(classOpcode) {
+    setOperand(0, ptr);
+    setInt64Operand(1, expected);
+    setInt64Operand(1 + INT64_PIECES, replacement);
+  }
+
+  MWasmCompareExchangeHeap* mir() const {
+    return mir_->toWasmCompareExchangeHeap();
+  }
+  const LAllocation* ptr() { return getOperand(0); }
+  const LInt64Allocation expected() { return getInt64Operand(1); }
+  const LInt64Allocation replacement() {
+    return getInt64Operand(1 + INT64_PIECES);
+  }
+};
+
+class LWasmAtomicBinopI64
+    : public LInstructionHelper<INT64_PIECES, 1 + INT64_PIECES, 2> {
+  const wasm::MemoryAccessDesc& access_;
+  AtomicOp op_;
+
+ public:
+  LIR_HEADER(WasmAtomicBinopI64);
+
+  LWasmAtomicBinopI64(const LAllocation& ptr, const LInt64Allocation& value,
+                      const LDefinition& tmpLow, const LDefinition& tmpHigh,
+                      const wasm::MemoryAccessDesc& access, AtomicOp op)
+      : LInstructionHelper(classOpcode), access_(access), op_(op) {
+    setOperand(0, ptr);
+    setInt64Operand(1, value);
+    setTemp(0, tmpLow);
+    setTemp(1, tmpHigh);
+  }
+
+  const LAllocation* ptr() { return getOperand(0); }
+  const LInt64Allocation value() { return getInt64Operand(1); }
+  const wasm::MemoryAccessDesc& access() { return access_; }
+  AtomicOp operation() const { return op_; }
+  const LDefinition* tmpLow() { return getTemp(0); }
+  const LDefinition* tmpHigh() { return getTemp(1); }
+};
+
+class LWasmAtomicExchangeI64
+    : public LInstructionHelper<INT64_PIECES, 1 + INT64_PIECES, 0> {
+  const wasm::MemoryAccessDesc& access_;
+
+ public:
+  LIR_HEADER(WasmAtomicExchangeI64);
+
+  LWasmAtomicExchangeI64(const LAllocation& ptr, const LInt64Allocation& value,
+                         const wasm::MemoryAccessDesc& access)
+      : LInstructionHelper(classOpcode), access_(access) {
+    setOperand(0, ptr);
+    setInt64Operand(1, value);
+  }
+
+  const LAllocation* ptr() { return getOperand(0); }
+  const LInt64Allocation value() { return getInt64Operand(1); }
+  const wasm::MemoryAccessDesc& access() { return access_; }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_LIR_arm_h */
diff --git a/js/src/jit/arm/Lowering-arm.cpp b/js/src/jit/arm/Lowering-arm.cpp
new file mode 100644
index 0000000000..e384ee7911
--- /dev/null
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -0,0 +1,1223 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/Lowering-arm.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/Lowering.h"
+#include "jit/MIR.h"
+#include "jit/shared/Lowering-shared-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::FloorLog2;
+
+LBoxAllocation LIRGeneratorARM::useBoxFixed(MDefinition* mir, Register reg1,
+                                            Register reg2, bool useAtStart) {
+  MOZ_ASSERT(mir->type() == MIRType::Value);
+  MOZ_ASSERT(reg1 != reg2);
+
+  ensureDefined(mir);
+  return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart),
+                        LUse(reg2, VirtualRegisterOfPayload(mir), useAtStart));
+}
+
+LAllocation LIRGeneratorARM::useByteOpRegister(MDefinition* mir) {
+  return useRegister(mir);
+}
+
+LAllocation LIRGeneratorARM::useByteOpRegisterAtStart(MDefinition* mir) {
+  return useRegisterAtStart(mir);
+}
+
+LAllocation LIRGeneratorARM::useByteOpRegisterOrNonDoubleConstant(
+    MDefinition* mir) {
+  return useRegisterOrNonDoubleConstant(mir);
+}
+
+LDefinition LIRGeneratorARM::tempByteOpRegister() { return temp(); }
+
+void LIRGenerator::visitBox(MBox* box) {
+  MDefinition* inner = box->getOperand(0);
+
+  // If the box wrapped a double, it needs a new register.
+  if (IsFloatingPointType(inner->type())) {
+    defineBox(new (alloc()) LBoxFloatingPoint(
+                  useRegisterAtStart(inner), tempCopy(inner, 0), inner->type()),
+              box);
+    return;
+  }
+
+  if (box->canEmitAtUses()) {
+    emitAtUses(box);
+    return;
+  }
+
+  if (inner->isConstant()) {
+    defineBox(new (alloc()) LValue(inner->toConstant()->toJSValue()), box);
+    return;
+  }
+
+  LBox* lir = new (alloc()) LBox(use(inner), inner->type());
+
+  // Otherwise, we should not define a new register for the payload portion
+  // of the output, so bypass defineBox().
+  uint32_t vreg = getVirtualRegister();
+
+  // Note that because we're using BogusTemp(), we do not change the type of
+  // the definition. We also do not define the first output as "TYPE",
+  // because it has no corresponding payload at (vreg + 1). Also note that
+  // although we copy the input's original type for the payload half of the
+  // definition, this is only for clarity. BogusTemp() definitions are
+  // ignored.
+  lir->setDef(0, LDefinition(vreg, LDefinition::GENERAL));
+  lir->setDef(1, LDefinition::BogusTemp());
+  box->setVirtualRegister(vreg);
+  add(lir);
+}
+
+void LIRGenerator::visitUnbox(MUnbox* unbox) {
+  MDefinition* inner = unbox->getOperand(0);
+
+  // An unbox on arm reads in a type tag (either in memory or a register) and
+  // a payload. Unlike most instructions consuming a box, we ask for the type
+  // second, so that the result can re-use the first input.
+  MOZ_ASSERT(inner->type() == MIRType::Value);
+
+  ensureDefined(inner);
+
+  if (IsFloatingPointType(unbox->type())) {
+    LUnboxFloatingPoint* lir =
+        new (alloc()) LUnboxFloatingPoint(useBox(inner), unbox->type());
+    if (unbox->fallible()) {
+      assignSnapshot(lir, unbox->bailoutKind());
+    }
+    define(lir, unbox);
+    return;
+  }
+
+  // Swap the order we use the box pieces so we can re-use the payload register.
+  LUnbox* lir = new (alloc()) LUnbox;
+  lir->setOperand(0, usePayloadInRegisterAtStart(inner));
+  lir->setOperand(1, useType(inner, LUse::REGISTER));
+
+  if (unbox->fallible()) {
+    assignSnapshot(lir, unbox->bailoutKind());
+  }
+
+  // Types and payloads form two separate intervals. If the type becomes dead
+  // before the payload, it could be used as a Value without the type being
+  // recoverable. Unbox's purpose is to eagerly kill the definition of a type
+  // tag, so keeping both alive (for the purpose of gcmaps) is unappealing.
+  // Instead, we create a new virtual register.
+  defineReuseInput(lir, unbox, 0);
+}
+
+void LIRGenerator::visitReturnImpl(MDefinition* opd, bool isGenerator) {
+  MOZ_ASSERT(opd->type() == MIRType::Value);
+
+  LReturn* ins = new (alloc()) LReturn(isGenerator);
+  ins->setOperand(0, LUse(JSReturnReg_Type));
+  ins->setOperand(1, LUse(JSReturnReg_Data));
+  fillBoxUses(ins, 0, opd);
+  add(ins);
+}
+
+void LIRGeneratorARM::defineInt64Phi(MPhi* phi, size_t lirIndex) {
+  LPhi* low = current->getPhi(lirIndex + INT64LOW_INDEX);
+  LPhi* high = current->getPhi(lirIndex + INT64HIGH_INDEX);
+
+  uint32_t lowVreg = getVirtualRegister();
+
+  phi->setVirtualRegister(lowVreg);
+
+  uint32_t highVreg = getVirtualRegister();
+  MOZ_ASSERT(lowVreg + INT64HIGH_INDEX == highVreg + INT64LOW_INDEX);
+
+  low->setDef(0, LDefinition(lowVreg, LDefinition::INT32));
+  high->setDef(0, LDefinition(highVreg, LDefinition::INT32));
+  annotate(high);
+  annotate(low);
+}
+
+void LIRGeneratorARM::lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition,
+                                         LBlock* block, size_t lirIndex) {
+  MDefinition* operand = phi->getOperand(inputPosition);
+  LPhi* low = block->getPhi(lirIndex + INT64LOW_INDEX);
+  LPhi* high = block->getPhi(lirIndex + INT64HIGH_INDEX);
+  low->setOperand(inputPosition,
+                  LUse(operand->virtualRegister() + INT64LOW_INDEX, LUse::ANY));
+  high->setOperand(
+      inputPosition,
+      LUse(operand->virtualRegister() + INT64HIGH_INDEX, LUse::ANY));
+}
+
+// x = !y
+void LIRGeneratorARM::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
+                                  MDefinition* mir, MDefinition* input) {
+  ins->setOperand(
+      0, ins->snapshot() ? useRegister(input) : useRegisterAtStart(input));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+// z = x+y
+void LIRGeneratorARM::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
+                                  MDefinition* mir, MDefinition* lhs,
+                                  MDefinition* rhs) {
+  // Some operations depend on checking inputs after writing the result, e.g.
+  // MulI, but only for bail out paths so useAtStart when no bailouts.
+  ins->setOperand(0,
+                  ins->snapshot() ? useRegister(lhs) : useRegisterAtStart(lhs));
+  ins->setOperand(1, ins->snapshot() ? useRegisterOrConstant(rhs)
+                                     : useRegisterOrConstantAtStart(rhs));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+void LIRGeneratorARM::lowerForALUInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, MDefinition* mir,
+    MDefinition* input) {
+  ins->setInt64Operand(0, useInt64RegisterAtStart(input));
+  defineInt64ReuseInput(ins, mir, 0);
+}
+
+void LIRGeneratorARM::lowerForALUInt64(
+    LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+  ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+  ins->setInt64Operand(INT64_PIECES, useInt64OrConstant(rhs));
+  defineInt64ReuseInput(ins, mir, 0);
+}
+
+void LIRGeneratorARM::lowerForMulInt64(LMulI64* ins, MMul* mir,
+                                       MDefinition* lhs, MDefinition* rhs) {
+  bool needsTemp = true;
+
+  if (rhs->isConstant()) {
+    int64_t constant = rhs->toConstant()->toInt64();
+    int32_t shift = mozilla::FloorLog2(constant);
+    // See special cases in CodeGeneratorARM::visitMulI64
+    if (constant >= -1 && constant <= 2) {
+      needsTemp = false;
+    }
+    if (constant > 0 && int64_t(1) << shift == constant) {
+      needsTemp = false;
+    }
+  }
+
+  ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+  ins->setInt64Operand(INT64_PIECES, useInt64OrConstant(rhs));
+  if (needsTemp) {
+    ins->setTemp(0, temp());
+  }
+
+  defineInt64ReuseInput(ins, mir, 0);
+}
+
+void LIRGeneratorARM::lowerForCompareI64AndBranch(MTest* mir, MCompare* comp,
+                                                  JSOp op, MDefinition* left,
+                                                  MDefinition* right,
+                                                  MBasicBlock* ifTrue,
+                                                  MBasicBlock* ifFalse) {
+  LCompareI64AndBranch* lir = new (alloc())
+      LCompareI64AndBranch(comp, op, useInt64Register(left),
+                           useInt64OrConstant(right), ifTrue, ifFalse);
+  add(lir, mir);
+}
+
+void LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
+                                  MDefinition* mir, MDefinition* input) {
+  ins->setOperand(0, useRegisterAtStart(input));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+template <size_t Temps>
+void LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
+                                  MDefinition* mir, MDefinition* lhs,
+                                  MDefinition* rhs) {
+  ins->setOperand(0, useRegisterAtStart(lhs));
+  ins->setOperand(1, useRegisterAtStart(rhs));
+  define(
+      ins, mir,
+      LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+template void LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
+                                           MDefinition* mir, MDefinition* lhs,
+                                           MDefinition* rhs);
+template void LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, 1>* ins,
+                                           MDefinition* mir, MDefinition* lhs,
+                                           MDefinition* rhs);
+
+void LIRGeneratorARM::lowerForBitAndAndBranch(LBitAndAndBranch* baab,
+                                              MInstruction* mir,
+                                              MDefinition* lhs,
+                                              MDefinition* rhs) {
+  baab->setOperand(0, useRegisterAtStart(lhs));
+  baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
+  add(baab, mir);
+}
+
+void LIRGeneratorARM::lowerWasmBuiltinTruncateToInt32(
+    MWasmBuiltinTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+  if (opd->type() == MIRType::Double) {
+    define(new (alloc()) LWasmBuiltinTruncateDToInt32(
+               useRegister(opd), useFixedAtStart(ins->instance(), InstanceReg),
+               LDefinition::BogusTemp()),
+           ins);
+    return;
+  }
+
+  define(new (alloc()) LWasmBuiltinTruncateFToInt32(
+             useRegister(opd), useFixedAtStart(ins->instance(), InstanceReg),
+             LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGeneratorARM::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition,
+                                           LBlock* block, size_t lirIndex) {
+  MDefinition* operand = phi->getOperand(inputPosition);
+  LPhi* type = block->getPhi(lirIndex + VREG_TYPE_OFFSET);
+  LPhi* payload = block->getPhi(lirIndex + VREG_DATA_OFFSET);
+  type->setOperand(
+      inputPosition,
+      LUse(operand->virtualRegister() + VREG_TYPE_OFFSET, LUse::ANY));
+  payload->setOperand(inputPosition,
+                      LUse(VirtualRegisterOfPayload(operand), LUse::ANY));
+}
+
+void LIRGeneratorARM::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
+                                    MDefinition* mir, MDefinition* lhs,
+                                    MDefinition* rhs) {
+  ins->setOperand(0, useRegister(lhs));
+  ins->setOperand(1, useRegisterOrConstant(rhs));
+  define(ins, mir);
+}
+
+template <size_t Temps>
+void LIRGeneratorARM::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+  if (mir->isRotate() && !rhs->isConstant()) {
+    ins->setTemp(0, temp());
+  }
+
+  ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+  ins->setOperand(INT64_PIECES, useRegisterOrConstant(rhs));
+  defineInt64ReuseInput(ins, mir, 0);
+}
+
+template void LIRGeneratorARM::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorARM::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins,
+    MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+void LIRGeneratorARM::lowerDivI(MDiv* div) {
+  if (div->isUnsigned()) {
+    lowerUDiv(div);
+    return;
+  }
+
+  // Division instructions are slow. Division by constant denominators can be
+  // rewritten to use other instructions.
+  if (div->rhs()->isConstant()) {
+    int32_t rhs = div->rhs()->toConstant()->toInt32();
+    // Check for division by a positive power of two, which is an easy and
+    // important case to optimize. Note that other optimizations are also
+    // possible; division by negative powers of two can be optimized in a
+    // similar manner as positive powers of two, and division by other
+    // constants can be optimized by a reciprocal multiplication technique.
+    int32_t shift = FloorLog2(rhs);
+    if (rhs > 0 && 1 << shift == rhs) {
+      LDivPowTwoI* lir =
+          new (alloc()) LDivPowTwoI(useRegisterAtStart(div->lhs()), shift);
+      if (div->fallible()) {
+        assignSnapshot(lir, div->bailoutKind());
+      }
+      define(lir, div);
+      return;
+    }
+  }
+
+  if (HasIDIV()) {
+    LDivI* lir = new (alloc())
+        LDivI(useRegister(div->lhs()), useRegister(div->rhs()), temp());
+    if (div->fallible()) {
+      assignSnapshot(lir, div->bailoutKind());
+    }
+    define(lir, div);
+    return;
+  }
+
+  LSoftDivI* lir = new (alloc()) LSoftDivI(useFixedAtStart(div->lhs(), r0),
+                                           useFixedAtStart(div->rhs(), r1));
+
+  if (div->fallible()) {
+    assignSnapshot(lir, div->bailoutKind());
+  }
+
+  defineReturn(lir, div);
+}
+
+void LIRGeneratorARM::lowerNegI(MInstruction* ins, MDefinition* input) {
+  define(new (alloc()) LNegI(useRegisterAtStart(input)), ins);
+}
+
+void LIRGeneratorARM::lowerNegI64(MInstruction* ins, MDefinition* input) {
+  // Reuse the input.  Define + use-at-start would create risk that the output
+  // uses the same register pair as the input but in reverse order.  Reusing
+  // probably has less spilling than the alternative, define + use.
+  defineInt64ReuseInput(new (alloc()) LNegI64(useInt64RegisterAtStart(input)),
+                        ins, 0);
+}
+
+void LIRGenerator::visitAbs(MAbs* ins) {
+  define(allocateAbs(ins, useRegisterAtStart(ins->input())), ins);
+}
+
+void LIRGeneratorARM::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs) {
+  LMulI* lir = new (alloc()) LMulI;
+  if (mul->fallible()) {
+    assignSnapshot(lir, mul->bailoutKind());
+  }
+  lowerForALU(lir, mul, lhs, rhs);
+}
+
+void LIRGeneratorARM::lowerModI(MMod* mod) {
+  if (mod->isUnsigned()) {
+    lowerUMod(mod);
+    return;
+  }
+
+  if (mod->rhs()->isConstant()) {
+    int32_t rhs = mod->rhs()->toConstant()->toInt32();
+    int32_t shift = FloorLog2(rhs);
+    if (rhs > 0 && 1 << shift == rhs) {
+      LModPowTwoI* lir =
+          new (alloc()) LModPowTwoI(useRegister(mod->lhs()), shift);
+      if (mod->fallible()) {
+        assignSnapshot(lir, mod->bailoutKind());
+      }
+      define(lir, mod);
+      return;
+    }
+    if (shift < 31 && (1 << (shift + 1)) - 1 == rhs) {
+      MOZ_ASSERT(rhs);
+      LModMaskI* lir = new (alloc())
+          LModMaskI(useRegister(mod->lhs()), temp(), temp(), shift + 1);
+      if (mod->fallible()) {
+        assignSnapshot(lir, mod->bailoutKind());
+      }
+      define(lir, mod);
+      return;
+    }
+  }
+
+  if (HasIDIV()) {
+    LModI* lir =
+        new (alloc()) LModI(useRegister(mod->lhs()), useRegister(mod->rhs()));
+    if (mod->fallible()) {
+      assignSnapshot(lir, mod->bailoutKind());
+    }
+    define(lir, mod);
+    return;
+  }
+
+  LSoftModI* lir =
+      new (alloc()) LSoftModI(useFixedAtStart(mod->lhs(), r0),
+                              useFixedAtStart(mod->rhs(), r1), tempFixed(r2));
+
+  if (mod->fallible()) {
+    assignSnapshot(lir, mod->bailoutKind());
+  }
+
+  defineReturn(lir, mod);
+}
+
+void LIRGeneratorARM::lowerDivI64(MDiv* div) {
+  MOZ_CRASH("We use MWasmBuiltinDivI64 instead.");
+}
+
+void LIRGeneratorARM::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
+  if (div->isUnsigned()) {
+    LUDivOrModI64* lir = new (alloc())
+        LUDivOrModI64(useInt64RegisterAtStart(div->lhs()),
+                      useInt64RegisterAtStart(div->rhs()),
+                      useFixedAtStart(div->instance(), InstanceReg));
+    defineReturn(lir, div);
+    return;
+  }
+
+  LDivOrModI64* lir = new (alloc()) LDivOrModI64(
+      useInt64RegisterAtStart(div->lhs()), useInt64RegisterAtStart(div->rhs()),
+      useFixedAtStart(div->instance(), InstanceReg));
+  defineReturn(lir, div);
+}
+
+void LIRGeneratorARM::lowerModI64(MMod* mod) {
+  MOZ_CRASH("We use MWasmBuiltinModI64 instead.");
+}
+
+void LIRGeneratorARM::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
+  if (mod->isUnsigned()) {
+    LUDivOrModI64* lir = new (alloc())
+        LUDivOrModI64(useInt64RegisterAtStart(mod->lhs()),
+                      useInt64RegisterAtStart(mod->rhs()),
+                      useFixedAtStart(mod->instance(), InstanceReg));
+    defineReturn(lir, mod);
+    return;
+  }
+
+  LDivOrModI64* lir = new (alloc()) LDivOrModI64(
+      useInt64RegisterAtStart(mod->lhs()), useInt64RegisterAtStart(mod->rhs()),
+      useFixedAtStart(mod->instance(), InstanceReg));
+  defineReturn(lir, mod);
+}
+
+void LIRGeneratorARM::lowerUDivI64(MDiv* div) {
+  MOZ_CRASH("We use MWasmBuiltinDivI64 instead.");
+}
+
+void LIRGeneratorARM::lowerUModI64(MMod* mod) {
+  MOZ_CRASH("We use MWasmBuiltinModI64 instead.");
+}
+
+void LIRGenerator::visitPowHalf(MPowHalf* ins) {
+  MDefinition* input = ins->input();
+  MOZ_ASSERT(input->type() == MIRType::Double);
+  LPowHalfD* lir = new (alloc()) LPowHalfD(useRegisterAtStart(input));
+  defineReuseInput(lir, ins, 0);
+}
+
+void LIRGeneratorARM::lowerWasmSelectI(MWasmSelect* select) {
+  auto* lir = new (alloc())
+      LWasmSelect(useRegisterAtStart(select->trueExpr()),
+                  useAny(select->falseExpr()), useRegister(select->condExpr()));
+  defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
+}
+
+void LIRGeneratorARM::lowerWasmSelectI64(MWasmSelect* select) {
+  auto* lir = new (alloc()) LWasmSelectI64(
+      useInt64RegisterAtStart(select->trueExpr()),
+      useInt64(select->falseExpr()), useRegister(select->condExpr()));
+  defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex);
+}
+
+LTableSwitch* LIRGeneratorARM::newLTableSwitch(const LAllocation& in,
+                                               const LDefinition& inputCopy,
+                                               MTableSwitch* tableswitch) {
+  return new (alloc()) LTableSwitch(in, inputCopy, tableswitch);
+}
+
+LTableSwitchV* LIRGeneratorARM::newLTableSwitchV(MTableSwitch* tableswitch) {
+  return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(),
+                                     tempDouble(), tableswitch);
+}
+
+void LIRGeneratorARM::lowerUrshD(MUrsh* mir) {
+  MDefinition* lhs = mir->lhs();
+  MDefinition* rhs = mir->rhs();
+
+  MOZ_ASSERT(lhs->type() == MIRType::Int32);
+  MOZ_ASSERT(rhs->type() == MIRType::Int32);
+
+  LUrshD* lir = new (alloc())
+      LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp());
+  define(lir, mir);
+}
+
+void LIRGeneratorARM::lowerPowOfTwoI(MPow* mir) {
+  int32_t base = mir->input()->toConstant()->toInt32();
+  MDefinition* power = mir->power();
+
+  auto* lir = new (alloc()) LPowOfTwoI(useRegister(power), base);
+  assignSnapshot(lir, mir->bailoutKind());
+  define(lir, mir);
+}
+
+void LIRGeneratorARM::lowerBigIntLsh(MBigIntLsh* ins) {
+  auto* lir = new (alloc()) LBigIntLsh(
+      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM::lowerBigIntRsh(MBigIntRsh* ins) {
+  auto* lir = new (alloc()) LBigIntRsh(
+      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM::lowerBigIntDiv(MBigIntDiv* ins) {
+  LDefinition temp1, temp2;
+  if (HasIDIV()) {
+    temp1 = temp();
+    temp2 = temp();
+  } else {
+    temp1 = tempFixed(r0);
+    temp2 = tempFixed(r1);
+  }
+  auto* lir = new (alloc()) LBigIntDiv(useRegister(ins->lhs()),
+                                       useRegister(ins->rhs()), temp1, temp2);
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM::lowerBigIntMod(MBigIntMod* ins) {
+  LDefinition temp1, temp2;
+  if (HasIDIV()) {
+    temp1 = temp();
+    temp2 = temp();
+  } else {
+    temp1 = tempFixed(r0);
+    temp2 = tempFixed(r1);
+  }
+  auto* lir = new (alloc()) LBigIntMod(useRegister(ins->lhs()),
+                                       useRegister(ins->rhs()), temp1, temp2);
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
+  if (ins->type() == MIRType::Int32) {
+    define(new (alloc()) LNegI(useRegisterAtStart(ins->input())), ins);
+  } else if (ins->type() == MIRType::Float32) {
+    define(new (alloc()) LNegF(useRegisterAtStart(ins->input())), ins);
+  } else {
+    MOZ_ASSERT(ins->type() == MIRType::Double);
+    define(new (alloc()) LNegD(useRegisterAtStart(ins->input())), ins);
+  }
+}
+
+void LIRGeneratorARM::lowerUDiv(MDiv* div) {
+  MDefinition* lhs = div->getOperand(0);
+  MDefinition* rhs = div->getOperand(1);
+
+  if (HasIDIV()) {
+    LUDiv* lir = new (alloc()) LUDiv;
+    lir->setOperand(0, useRegister(lhs));
+    lir->setOperand(1, useRegister(rhs));
+    if (div->fallible()) {
+      assignSnapshot(lir, div->bailoutKind());
+    }
+    define(lir, div);
+    return;
+  }
+
+  LSoftUDivOrMod* lir = new (alloc())
+      LSoftUDivOrMod(useFixedAtStart(lhs, r0), useFixedAtStart(rhs, r1));
+
+  if (div->fallible()) {
+    assignSnapshot(lir, div->bailoutKind());
+  }
+
+  defineReturn(lir, div);
+}
+
+void LIRGeneratorARM::lowerUMod(MMod* mod) {
+  MDefinition* lhs = mod->getOperand(0);
+  MDefinition* rhs = mod->getOperand(1);
+
+  if (HasIDIV()) {
+    LUMod* lir = new (alloc()) LUMod;
+    lir->setOperand(0, useRegister(lhs));
+    lir->setOperand(1, useRegister(rhs));
+    if (mod->fallible()) {
+      assignSnapshot(lir, mod->bailoutKind());
+    }
+    define(lir, mod);
+    return;
+  }
+
+  LSoftUDivOrMod* lir = new (alloc())
+      LSoftUDivOrMod(useFixedAtStart(lhs, r0), useFixedAtStart(rhs, r1));
+
+  if (mod->fallible()) {
+    assignSnapshot(lir, mod->bailoutKind());
+  }
+
+  defineReturn(lir, mod);
+}
+
+void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) {
+  MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
+  LWasmUint32ToDouble* lir =
+      new (alloc()) LWasmUint32ToDouble(useRegisterAtStart(ins->input()));
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) {
+  MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
+  LWasmUint32ToFloat32* lir =
+      new (alloc()) LWasmUint32ToFloat32(useRegisterAtStart(ins->input()));
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmHeapBase(MWasmHeapBase* ins) {
+  auto* lir = new (alloc()) LWasmHeapBase(LAllocation());
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmLoad(MWasmLoad* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  if (ins->access().type() == Scalar::Int64 && ins->access().isAtomic()) {
+    auto* lir = new (alloc()) LWasmAtomicLoadI64(useRegisterAtStart(base));
+    defineInt64Fixed(lir, ins,
+                     LInt64Allocation(LAllocation(AnyRegister(IntArgReg1)),
+                                      LAllocation(AnyRegister(IntArgReg0))));
+    return;
+  }
+
+  LAllocation ptr = useRegisterAtStart(base);
+
+  if (ins->type() == MIRType::Int64) {
+    auto* lir = new (alloc()) LWasmLoadI64(ptr);
+    if (ins->access().offset() || ins->access().type() == Scalar::Int64) {
+      lir->setTemp(0, tempCopy(base, 0));
+    }
+    defineInt64(lir, ins);
+    return;
+  }
+
+  auto* lir = new (alloc()) LWasmLoad(ptr);
+  if (ins->access().offset()) {
+    lir->setTemp(0, tempCopy(base, 0));
+  }
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmStore(MWasmStore* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  if (ins->access().type() == Scalar::Int64 && ins->access().isAtomic()) {
+    auto* lir = new (alloc()) LWasmAtomicStoreI64(
+        useRegister(base),
+        useInt64Fixed(ins->value(), Register64(IntArgReg1, IntArgReg0)),
+        tempFixed(IntArgReg2), tempFixed(IntArgReg3));
+    add(lir, ins);
+    return;
+  }
+
+  LAllocation ptr = useRegisterAtStart(base);
+
+  if (ins->value()->type() == MIRType::Int64) {
+    LInt64Allocation value = useInt64RegisterAtStart(ins->value());
+    auto* lir = new (alloc()) LWasmStoreI64(ptr, value);
+    if (ins->access().offset() || ins->access().type() == Scalar::Int64) {
+      lir->setTemp(0, tempCopy(base, 0));
+    }
+    add(lir, ins);
+    return;
+  }
+
+  LAllocation value = useRegisterAtStart(ins->value());
+  auto* lir = new (alloc()) LWasmStore(ptr, value);
+
+  if (ins->access().offset()) {
+    lir->setTemp(0, tempCopy(base, 0));
+  }
+
+  add(lir, ins);
+}
+
+void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  // For the ARM it is best to keep the 'base' in a register if a bounds check
+  // is needed.
+  LAllocation baseAlloc;
+  LAllocation limitAlloc;
+
+  if (base->isConstant() && !ins->needsBoundsCheck()) {
+    // A bounds check is only skipped for a positive index.
+    MOZ_ASSERT(base->toConstant()->toInt32() >= 0);
+    baseAlloc = LAllocation(base->toConstant());
+  } else {
+    baseAlloc = useRegisterAtStart(base);
+    if (ins->needsBoundsCheck()) {
+      MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+      MOZ_ASSERT(boundsCheckLimit->type() == MIRType::Int32);
+      limitAlloc = useRegisterAtStart(boundsCheckLimit);
+    }
+  }
+
+  define(new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, LAllocation()),
+         ins);
+}
+
+void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  LAllocation baseAlloc;
+  LAllocation limitAlloc;
+
+  if (base->isConstant() && !ins->needsBoundsCheck()) {
+    MOZ_ASSERT(base->toConstant()->toInt32() >= 0);
+    baseAlloc = LAllocation(base->toConstant());
+  } else {
+    baseAlloc = useRegisterAtStart(base);
+    if (ins->needsBoundsCheck()) {
+      MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+      MOZ_ASSERT(boundsCheckLimit->type() == MIRType::Int32);
+      limitAlloc = useRegisterAtStart(boundsCheckLimit);
+    }
+  }
+
+  add(new (alloc()) LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
+                                    limitAlloc, LAllocation()),
+      ins);
+}
+
+void LIRGeneratorARM::lowerTruncateDToInt32(MTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Double);
+
+  define(new (alloc())
+             LTruncateDToInt32(useRegister(opd), LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGeneratorARM::lowerTruncateFToInt32(MTruncateToInt32* ins) {
+  MDefinition* opd = ins->input();
+  MOZ_ASSERT(opd->type() == MIRType::Float32);
+
+  define(new (alloc())
+             LTruncateFToInt32(useRegister(opd), LDefinition::BogusTemp()),
+         ins);
+}
+
+void LIRGenerator::visitAtomicExchangeTypedArrayElement(
+    MAtomicExchangeTypedArrayElement* ins) {
+  MOZ_ASSERT(HasLDSTREXBHD());
+
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+  const LAllocation value = useRegister(ins->value());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    // The two register pairs must be distinct.
+    LInt64Definition temp1 = tempInt64Fixed(Register64(IntArgReg3, IntArgReg2));
+    LDefinition temp2 = tempFixed(IntArgReg1);
+
+    auto* lir = new (alloc()) LAtomicExchangeTypedArrayElement64(
+        elements, index, value, temp1, temp2);
+    defineFixed(lir, ins, LAllocation(AnyRegister(IntArgReg0)));
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
+
+  // If the target is a floating register then we need a temp at the
+  // CodeGenerator level for creating the result.
+
+  LDefinition tempDef = LDefinition::BogusTemp();
+  if (ins->arrayType() == Scalar::Uint32) {
+    MOZ_ASSERT(ins->type() == MIRType::Double);
+    tempDef = temp();
+  }
+
+  LAtomicExchangeTypedArrayElement* lir = new (alloc())
+      LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitAtomicTypedArrayElementBinop(
+    MAtomicTypedArrayElementBinop* ins) {
+  MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+  const LAllocation value = useRegister(ins->value());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    // Wasm additionally pins the value register to `FetchOpVal64`, but it's
+    // unclear why this was deemed necessary.
+    LInt64Definition temp1 = tempInt64();
+    LInt64Definition temp2 = tempInt64Fixed(FetchOpTmp64);
+
+    if (ins->isForEffect()) {
+      auto* lir = new (alloc()) LAtomicTypedArrayElementBinopForEffect64(
+          elements, index, value, temp1, temp2);
+      add(lir, ins);
+      return;
+    }
+
+    LInt64Definition temp3 = tempInt64Fixed(FetchOpOut64);
+
+    auto* lir = new (alloc()) LAtomicTypedArrayElementBinop64(
+        elements, index, value, temp1, temp2, temp3);
+    define(lir, ins);
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  if (ins->isForEffect()) {
+    LAtomicTypedArrayElementBinopForEffect* lir = new (alloc())
+        LAtomicTypedArrayElementBinopForEffect(elements, index, value,
+                                               /* flagTemp= */ temp());
+    add(lir, ins);
+    return;
+  }
+
+  // For a Uint32Array with a known double result we need a temp for
+  // the intermediate output.
+  //
+  // Optimization opportunity (bug 1077317): We can do better by
+  // allowing 'value' to remain as an imm32 if it is small enough to
+  // fit in an instruction.
+
+  LDefinition flagTemp = temp();
+  LDefinition outTemp = LDefinition::BogusTemp();
+
+  if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+    outTemp = temp();
+  }
+
+  // On arm, map flagTemp to temp1 and outTemp to temp2, at least for now.
+
+  LAtomicTypedArrayElementBinop* lir = new (alloc())
+      LAtomicTypedArrayElementBinop(elements, index, value, flagTemp, outTemp);
+  define(lir, ins);
+}
+
+void LIRGenerator::visitCompareExchangeTypedArrayElement(
+    MCompareExchangeTypedArrayElement* ins) {
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+  MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+  const LAllocation newval = useRegister(ins->newval());
+  const LAllocation oldval = useRegister(ins->oldval());
+
+  if (Scalar::isBigIntType(ins->arrayType())) {
+    // The three register pairs must be distinct.
+    LInt64Definition temp1 = tempInt64Fixed(CmpXchgOld64);
+    LInt64Definition temp2 = tempInt64Fixed(CmpXchgNew64);
+    LInt64Definition temp3 = tempInt64Fixed(CmpXchgOut64);
+
+    auto* lir = new (alloc()) LCompareExchangeTypedArrayElement64(
+        elements, index, oldval, newval, temp1, temp2, temp3);
+    define(lir, ins);
+    assignSafepoint(lir, ins);
+    return;
+  }
+
+  // If the target is a floating register then we need a temp at the
+  // CodeGenerator level for creating the result.
+  //
+  // Optimization opportunity (bug 1077317): We could do better by
+  // allowing oldval to remain an immediate, if it is small enough
+  // to fit in an instruction.
+
+  LDefinition tempDef = LDefinition::BogusTemp();
+  if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+    tempDef = temp();
+  }
+
+  LCompareExchangeTypedArrayElement* lir =
+      new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
+                                                      newval, tempDef);
+
+  define(lir, ins);
+}
+
+void LIRGeneratorARM::lowerAtomicLoad64(MLoadUnboxedScalar* ins) {
+  const LUse elements = useRegister(ins->elements());
+  const LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->storageType());
+
+  auto* lir = new (alloc())
+      LAtomicLoad64(elements, index, temp(),
+                    tempInt64Fixed(Register64(IntArgReg1, IntArgReg0)));
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM::lowerAtomicStore64(MStoreUnboxedScalar* ins) {
+  LUse elements = useRegister(ins->elements());
+  LAllocation index =
+      useRegisterOrIndexConstant(ins->index(), ins->writeType());
+  LAllocation value = useRegister(ins->value());
+  LInt64Definition temp1 = tempInt64Fixed(Register64(IntArgReg1, IntArgReg0));
+  LInt64Definition temp2 = tempInt64Fixed(Register64(IntArgReg3, IntArgReg2));
+
+  add(new (alloc()) LAtomicStore64(elements, index, value, temp1, temp2), ins);
+}
+
+void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) {
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  if (ins->access().type() == Scalar::Int64) {
+    // The three register pairs must be distinct.
+    auto* lir = new (alloc()) LWasmCompareExchangeI64(
+        useRegister(base), useInt64Fixed(ins->oldValue(), CmpXchgOld64),
+        useInt64Fixed(ins->newValue(), CmpXchgNew64));
+    defineInt64Fixed(lir, ins,
+                     LInt64Allocation(LAllocation(AnyRegister(CmpXchgOutHi)),
+                                      LAllocation(AnyRegister(CmpXchgOutLo))));
+    return;
+  }
+
+  MOZ_ASSERT(ins->access().type() < Scalar::Float32);
+  MOZ_ASSERT(HasLDSTREXBHD(), "by HasPlatformSupport() constraints");
+
+  LWasmCompareExchangeHeap* lir = new (alloc())
+      LWasmCompareExchangeHeap(useRegister(base), useRegister(ins->oldValue()),
+                               useRegister(ins->newValue()));
+
+  define(lir, ins);
+}
+
+void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) {
+  MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
+
+  if (ins->access().type() == Scalar::Int64) {
+    auto* lir = new (alloc()) LWasmAtomicExchangeI64(
+        useRegister(ins->base()), useInt64Fixed(ins->value(), XchgNew64),
+        ins->access());
+    defineInt64Fixed(lir, ins,
+                     LInt64Allocation(LAllocation(AnyRegister(XchgOutHi)),
+                                      LAllocation(AnyRegister(XchgOutLo))));
+    return;
+  }
+
+  MOZ_ASSERT(ins->access().type() < Scalar::Float32);
+  MOZ_ASSERT(HasLDSTREXBHD(), "by HasPlatformSupport() constraints");
+
+  const LAllocation base = useRegister(ins->base());
+  const LAllocation value = useRegister(ins->value());
+  define(new (alloc()) LWasmAtomicExchangeHeap(base, value), ins);
+}
+
+void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) {
+  if (ins->access().type() == Scalar::Int64) {
+    auto* lir = new (alloc()) LWasmAtomicBinopI64(
+        useRegister(ins->base()), useInt64Fixed(ins->value(), FetchOpVal64),
+        tempFixed(FetchOpTmpLo), tempFixed(FetchOpTmpHi), ins->access(),
+        ins->operation());
+    defineInt64Fixed(lir, ins,
+                     LInt64Allocation(LAllocation(AnyRegister(FetchOpOutHi)),
+                                      LAllocation(AnyRegister(FetchOpOutLo))));
+    return;
+  }
+
+  MOZ_ASSERT(ins->access().type() < Scalar::Float32);
+  MOZ_ASSERT(HasLDSTREXBHD(), "by HasPlatformSupport() constraints");
+
+  MDefinition* base = ins->base();
+  MOZ_ASSERT(base->type() == MIRType::Int32);
+
+  if (!ins->hasUses()) {
+    LWasmAtomicBinopHeapForEffect* lir =
+        new (alloc()) LWasmAtomicBinopHeapForEffect(useRegister(base),
+                                                    useRegister(ins->value()),
+                                                    /* flagTemp= */ temp());
+    add(lir, ins);
+    return;
+  }
+
+  LWasmAtomicBinopHeap* lir = new (alloc())
+      LWasmAtomicBinopHeap(useRegister(base), useRegister(ins->value()),
+                           /* temp = */ LDefinition::BogusTemp(),
+                           /* flagTemp= */ temp());
+  define(lir, ins);
+}
+
+void LIRGenerator::visitSubstr(MSubstr* ins) {
+  LSubstr* lir = new (alloc())
+      LSubstr(useRegister(ins->string()), useRegister(ins->begin()),
+              useRegister(ins->length()), temp(), temp(), tempByteOpRegister());
+  define(lir, ins);
+  assignSafepoint(lir, ins);
+}
+
+void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) {
+  MOZ_CRASH("We don't use MWasmTruncateToInt64 for arm");
+}
+
+void LIRGeneratorARM::lowerWasmBuiltinTruncateToInt64(
+    MWasmBuiltinTruncateToInt64* ins) {
+  MDefinition* opd = ins->input();
+  MDefinition* instance = ins->instance();
+  MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+  defineReturn(new (alloc())
+                   LWasmTruncateToInt64(useRegisterAtStart(opd),
+                                        useFixedAtStart(instance, InstanceReg)),
+               ins);
+}
+
+void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) {
+  MOZ_CRASH("We use BuiltinInt64ToFloatingPoint instead.");
+}
+
+void LIRGeneratorARM::lowerBuiltinInt64ToFloatingPoint(
+    MBuiltinInt64ToFloatingPoint* ins) {
+  MOZ_ASSERT(ins->type() == MIRType::Double || ins->type() == MIRType::Float32);
+
+  auto* lir = new (alloc())
+      LInt64ToFloatingPointCall(useInt64RegisterAtStart(ins->input()),
+                                useFixedAtStart(ins->instance(), InstanceReg));
+  defineReturn(lir, ins);
+}
+
+void LIRGenerator::visitCopySign(MCopySign* ins) {
+  MDefinition* lhs = ins->lhs();
+  MDefinition* rhs = ins->rhs();
+
+  MOZ_ASSERT(IsFloatingPointType(lhs->type()));
+  MOZ_ASSERT(lhs->type() == rhs->type());
+  MOZ_ASSERT(lhs->type() == ins->type());
+
+  LInstructionHelper<1, 2, 2>* lir;
+  if (lhs->type() == MIRType::Double) {
+    lir = new (alloc()) LCopySignD();
+  } else {
+    lir = new (alloc()) LCopySignF();
+  }
+
+  lir->setTemp(0, temp());
+  lir->setTemp(1, temp());
+
+  lowerForFPU(lir, ins, lhs, rhs);
+}
+
+void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) {
+  auto* lir =
+      new (alloc()) LExtendInt32ToInt64(useRegisterAtStart(ins->input()));
+  defineInt64(lir, ins);
+
+  LDefinition def(LDefinition::GENERAL, LDefinition::MUST_REUSE_INPUT);
+  def.setReusedInput(0);
+  def.setVirtualRegister(ins->virtualRegister());
+
+  lir->setDef(0, def);
+}
+
+void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) {
+  defineInt64(new (alloc())
+                  LSignExtendInt64(useInt64RegisterAtStart(ins->input())),
+              ins);
+}
+
+// On arm we specialize the only cases where compare is {U,}Int32 and select
+// is {U,}Int32.
+bool LIRGeneratorShared::canSpecializeWasmCompareAndSelect(
+    MCompare::CompareType compTy, MIRType insTy) {
+  return insTy == MIRType::Int32 && (compTy == MCompare::Compare_Int32 ||
+                                     compTy == MCompare::Compare_UInt32);
+}
+
+void LIRGeneratorShared::lowerWasmCompareAndSelect(MWasmSelect* ins,
+                                                   MDefinition* lhs,
+                                                   MDefinition* rhs,
+                                                   MCompare::CompareType compTy,
+                                                   JSOp jsop) {
+  MOZ_ASSERT(canSpecializeWasmCompareAndSelect(compTy, ins->type()));
+  auto* lir = new (alloc()) LWasmCompareAndSelect(
+      useRegister(lhs), useRegister(rhs), compTy, jsop,
+      useRegisterAtStart(ins->trueExpr()), useRegister(ins->falseExpr()));
+  defineReuseInput(lir, ins, LWasmCompareAndSelect::IfTrueExprIndex);
+}
+
+void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) {
+  MOZ_CRASH("ternary SIMD NYI");
+}
+
+void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
+  MOZ_CRASH("binary SIMD NYI");
+}
+
+#ifdef ENABLE_WASM_SIMD
+bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle(
+    int8_t shuffle[16]) {
+  return false;
+}
+bool MWasmTernarySimd128::canRelaxBitselect() { return false; }
+
+bool MWasmBinarySimd128::canPmaddubsw() { return false; }
+#endif
+
+bool MWasmBinarySimd128::specializeForConstantRhs() {
+  // Probably many we want to do here
+  return false;
+}
+
+void LIRGenerator::visitWasmBinarySimd128WithConstant(
+    MWasmBinarySimd128WithConstant* ins) {
+  MOZ_CRASH("binary SIMD with constant NYI");
+}
+
+void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
+  MOZ_CRASH("shift SIMD NYI");
+}
+
+void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
+  MOZ_CRASH("shuffle SIMD NYI");
+}
+
+void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
+  MOZ_CRASH("replace-lane SIMD NYI");
+}
+
+void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
+  MOZ_CRASH("scalar-to-SIMD NYI");
+}
+
+void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
+  MOZ_CRASH("unary SIMD NYI");
+}
+
+void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
+  MOZ_CRASH("reduce-SIMD NYI");
+}
+
+void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
+  MOZ_CRASH("load-lane SIMD NYI");
+}
+
+void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
+  MOZ_CRASH("store-lane SIMD NYI");
+}
diff --git a/js/src/jit/arm/Lowering-arm.h b/js/src/jit/arm/Lowering-arm.h
new file mode 100644
index 0000000000..3f03d22941
--- /dev/null
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_Lowering_arm_h
+#define jit_arm_Lowering_arm_h
+
+#include "jit/shared/Lowering-shared.h"
+
+namespace js {
+namespace jit {
+
+class LIRGeneratorARM : public LIRGeneratorShared {
+ protected:
+  LIRGeneratorARM(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph)
+      : LIRGeneratorShared(gen, graph, lirGraph) {}
+
+  // Returns a box allocation with type set to reg1 and payload set to reg2.
+  LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register reg2,
+                             bool useAtStart = false);
+
+  // x86 has constraints on what registers can be formatted for 1-byte
+  // stores and loads; on ARM all registers are okay.
+  LAllocation useByteOpRegister(MDefinition* mir);
+  LAllocation useByteOpRegisterAtStart(MDefinition* mir);
+  LAllocation useByteOpRegisterOrNonDoubleConstant(MDefinition* mir);
+  LDefinition tempByteOpRegister();
+
+  inline LDefinition tempToUnbox() { return LDefinition::BogusTemp(); }
+
+  bool needTempForPostBarrier() { return false; }
+
+  void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
+                            size_t lirIndex);
+  void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
+                          size_t lirIndex);
+  void defineInt64Phi(MPhi* phi, size_t lirIndex);
+
+  void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                     MDefinition* lhs, MDefinition* rhs);
+  void lowerUrshD(MUrsh* mir);
+
+  void lowerPowOfTwoI(MPow* mir);
+
+  void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+                   MDefinition* input);
+  void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                   MDefinition* lhs, MDefinition* rhs);
+
+  void lowerForALUInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins,
+                        MDefinition* mir, MDefinition* input);
+  void lowerForALUInt64(
+      LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+      MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+  void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs,
+                        MDefinition* rhs);
+  template <size_t Temps>
+  void lowerForShiftInt64(
+      LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+      MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+  void lowerForCompareI64AndBranch(MTest* mir, MCompare* comp, JSOp op,
+                                   MDefinition* left, MDefinition* right,
+                                   MBasicBlock* ifTrue, MBasicBlock* ifFalse);
+
+  void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+                   MDefinition* src);
+  template <size_t Temps>
+  void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
+                   MDefinition* lhs, MDefinition* rhs);
+
+  void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins);
+  void lowerWasmBuiltinTruncateToInt64(MWasmBuiltinTruncateToInt64* ins);
+  void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
+                               MDefinition* lhs, MDefinition* rhs);
+  void lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32* ins);
+  void lowerTruncateDToInt32(MTruncateToInt32* ins);
+  void lowerTruncateFToInt32(MTruncateToInt32* ins);
+  void lowerDivI(MDiv* div);
+  void lowerModI(MMod* mod);
+  void lowerDivI64(MDiv* div);
+  void lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div);
+  void lowerModI64(MMod* mod);
+  void lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod);
+  void lowerUDivI64(MDiv* div);
+  void lowerUModI64(MMod* mod);
+  void lowerNegI(MInstruction* ins, MDefinition* input);
+  void lowerNegI64(MInstruction* ins, MDefinition* input);
+  void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs);
+  void lowerUDiv(MDiv* div);
+  void lowerUMod(MMod* mod);
+  void lowerWasmSelectI(MWasmSelect* select);
+  void lowerWasmSelectI64(MWasmSelect* select);
+
+  void lowerBigIntLsh(MBigIntLsh* ins);
+  void lowerBigIntRsh(MBigIntRsh* ins);
+  void lowerBigIntDiv(MBigIntDiv* ins);
+  void lowerBigIntMod(MBigIntMod* ins);
+
+  void lowerAtomicLoad64(MLoadUnboxedScalar* ins);
+  void lowerAtomicStore64(MStoreUnboxedScalar* ins);
+
+  LTableSwitch* newLTableSwitch(const LAllocation& in,
+                                const LDefinition& inputCopy,
+                                MTableSwitch* ins);
+  LTableSwitchV* newLTableSwitchV(MTableSwitch* ins);
+
+  void lowerPhi(MPhi* phi);
+};
+
+typedef LIRGeneratorARM LIRGeneratorSpecific;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_Lowering_arm_h */
diff --git a/js/src/jit/arm/MacroAssembler-arm-inl.h b/js/src/jit/arm/MacroAssembler-arm-inl.h
new file mode 100644
index 0000000000..94d323207e
--- /dev/null
+++ b/js/src/jit/arm/MacroAssembler-arm-inl.h
@@ -0,0 +1,2582 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_MacroAssembler_arm_inl_h
+#define jit_arm_MacroAssembler_arm_inl_h
+
+#include "jit/arm/MacroAssembler-arm.h"
+
+namespace js {
+namespace jit {
+
+//{{{ check_macroassembler_style
+
+void MacroAssembler::move64(Register64 src, Register64 dest) {
+  move32(src.low, dest.low);
+  move32(src.high, dest.high);
+}
+
+void MacroAssembler::move64(Imm64 imm, Register64 dest) {
+  move32(Imm32(imm.value & 0xFFFFFFFFL), dest.low);
+  move32(Imm32((imm.value >> 32) & 0xFFFFFFFFL), dest.high);
+}
+
+void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
+  ma_vxfer(src, dest);
+}
+
+void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
+  ma_vxfer(src, dest);
+}
+
+void MacroAssembler::move8SignExtend(Register src, Register dest) {
+  as_sxtb(dest, src, 0);
+}
+
+void MacroAssembler::move16SignExtend(Register src, Register dest) {
+  as_sxth(dest, src, 0);
+}
+
+void MacroAssembler::moveDoubleToGPR64(FloatRegister src, Register64 dest) {
+  ma_vxfer(src, dest.low, dest.high);
+}
+
+void MacroAssembler::moveGPR64ToDouble(Register64 src, FloatRegister dest) {
+  ma_vxfer(src.low, src.high, dest);
+}
+
+void MacroAssembler::move64To32(Register64 src, Register dest) {
+  if (src.low != dest) {
+    move32(src.low, dest);
+  }
+}
+
+void MacroAssembler::move32To64ZeroExtend(Register src, Register64 dest) {
+  if (src != dest.low) {
+    move32(src, dest.low);
+  }
+  move32(Imm32(0), dest.high);
+}
+
+void MacroAssembler::move8To64SignExtend(Register src, Register64 dest) {
+  as_sxtb(dest.low, src, 0);
+  ma_asr(Imm32(31), dest.low, dest.high);
+}
+
+void MacroAssembler::move16To64SignExtend(Register src, Register64 dest) {
+  as_sxth(dest.low, src, 0);
+  ma_asr(Imm32(31), dest.low, dest.high);
+}
+
+void MacroAssembler::move32To64SignExtend(Register src, Register64 dest) {
+  if (src != dest.low) {
+    move32(src, dest.low);
+  }
+  ma_asr(Imm32(31), dest.low, dest.high);
+}
+
+void MacroAssembler::move32SignExtendToPtr(Register src, Register dest) {
+  move32(src, dest);
+}
+
+void MacroAssembler::move32ZeroExtendToPtr(Register src, Register dest) {
+  move32(src, dest);
+}
+
+// ===============================================================
+// Load instructions
+
+void MacroAssembler::load32SignExtendToPtr(const Address& src, Register dest) {
+  load32(src, dest);
+}
+
+void MacroAssembler::loadAbiReturnAddress(Register dest) { movePtr(lr, dest); }
+
+// ===============================================================
+// Logical instructions
+
+void MacroAssembler::not32(Register reg) { ma_mvn(reg, reg); }
+
+void MacroAssembler::notPtr(Register reg) { ma_mvn(reg, reg); }
+
+void MacroAssembler::and32(Register src, Register dest) {
+  ma_and(src, dest, SetCC);
+}
+
+void MacroAssembler::and32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_and(imm, dest, scratch, SetCC);
+}
+
+void MacroAssembler::and32(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_and(imm, scratch, scratch2);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::and32(const Address& src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(src, scratch, scratch2);
+  ma_and(scratch, dest, SetCC);
+}
+
+void MacroAssembler::andPtr(Register src, Register dest) { ma_and(src, dest); }
+
+void MacroAssembler::andPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_and(imm, dest, scratch);
+}
+
+void MacroAssembler::and64(Imm64 imm, Register64 dest) {
+  if (imm.low().value != int32_t(0xFFFFFFFF)) {
+    and32(imm.low(), dest.low);
+  }
+  if (imm.hi().value != int32_t(0xFFFFFFFF)) {
+    and32(imm.hi(), dest.high);
+  }
+}
+
+void MacroAssembler::or64(Imm64 imm, Register64 dest) {
+  if (imm.low().value) {
+    or32(imm.low(), dest.low);
+  }
+  if (imm.hi().value) {
+    or32(imm.hi(), dest.high);
+  }
+}
+
+void MacroAssembler::xor64(Imm64 imm, Register64 dest) {
+  if (imm.low().value) {
+    xor32(imm.low(), dest.low);
+  }
+  if (imm.hi().value) {
+    xor32(imm.hi(), dest.high);
+  }
+}
+
+void MacroAssembler::or32(Register src, Register dest) { ma_orr(src, dest); }
+
+void MacroAssembler::or32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_orr(imm, dest, scratch);
+}
+
+void MacroAssembler::or32(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_orr(imm, scratch, scratch2);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::orPtr(Register src, Register dest) { ma_orr(src, dest); }
+
+void MacroAssembler::orPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_orr(imm, dest, scratch);
+}
+
+void MacroAssembler::and64(Register64 src, Register64 dest) {
+  and32(src.low, dest.low);
+  and32(src.high, dest.high);
+}
+
+void MacroAssembler::or64(Register64 src, Register64 dest) {
+  or32(src.low, dest.low);
+  or32(src.high, dest.high);
+}
+
+void MacroAssembler::xor64(Register64 src, Register64 dest) {
+  ma_eor(src.low, dest.low);
+  ma_eor(src.high, dest.high);
+}
+
+void MacroAssembler::xor32(Register src, Register dest) {
+  ma_eor(src, dest, SetCC);
+}
+
+void MacroAssembler::xor32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_eor(imm, dest, scratch, SetCC);
+}
+
+void MacroAssembler::xor32(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_eor(imm, scratch, scratch2);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::xor32(const Address& src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(src, scratch, scratch2);
+  ma_eor(scratch, dest, SetCC);
+}
+
+void MacroAssembler::xorPtr(Register src, Register dest) { ma_eor(src, dest); }
+
+void MacroAssembler::xorPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_eor(imm, dest, scratch);
+}
+
+// ===============================================================
+// Swap instructions
+
+void MacroAssembler::byteSwap16SignExtend(Register reg) { as_revsh(reg, reg); }
+
+void MacroAssembler::byteSwap16ZeroExtend(Register reg) {
+  as_rev16(reg, reg);
+  as_uxth(reg, reg, 0);
+}
+
+void MacroAssembler::byteSwap32(Register reg) { as_rev(reg, reg); }
+
+void MacroAssembler::byteSwap64(Register64 reg) {
+  as_rev(reg.high, reg.high);
+  as_rev(reg.low, reg.low);
+
+  ScratchRegisterScope scratch(*this);
+  ma_mov(reg.high, scratch);
+  ma_mov(reg.low, reg.high);
+  ma_mov(scratch, reg.low);
+}
+
+// ===============================================================
+// Arithmetic functions
+
+void MacroAssembler::add32(Register src, Register dest) {
+  ma_add(src, dest, SetCC);
+}
+
+void MacroAssembler::add32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm, dest, scratch, SetCC);
+}
+
+void MacroAssembler::add32(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_add(imm, scratch, scratch2, SetCC);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::addPtr(Register src, Register dest) { ma_add(src, dest); }
+
+void MacroAssembler::addPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm, dest, scratch);
+}
+
+void MacroAssembler::addPtr(ImmWord imm, Register dest) {
+  addPtr(Imm32(imm.value), dest);
+}
+
+void MacroAssembler::addPtr(Imm32 imm, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_add(imm, scratch, scratch2);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::addPtr(const Address& src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(src, scratch, scratch2);
+  ma_add(scratch, dest, SetCC);
+}
+
+void MacroAssembler::add64(Register64 src, Register64 dest) {
+  ma_add(src.low, dest.low, SetCC);
+  ma_adc(src.high, dest.high);
+}
+
+void MacroAssembler::add64(Imm32 imm, Register64 dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm, dest.low, scratch, SetCC);
+  as_adc(dest.high, dest.high, Imm8(0), LeaveCC);
+}
+
+void MacroAssembler::add64(Imm64 imm, Register64 dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm.low(), dest.low, scratch, SetCC);
+  ma_adc(imm.hi(), dest.high, scratch, LeaveCC);
+}
+
+CodeOffset MacroAssembler::sub32FromStackPtrWithPatch(Register dest) {
+  ScratchRegisterScope scratch(*this);
+  CodeOffset offs = CodeOffset(currentOffset());
+  ma_movPatchable(Imm32(0), scratch, Always);
+  ma_sub(getStackPointer(), scratch, dest);
+  return offs;
+}
+
+void MacroAssembler::patchSub32FromStackPtr(CodeOffset offset, Imm32 imm) {
+  ScratchRegisterScope scratch(*this);
+  BufferInstructionIterator iter(BufferOffset(offset.offset()), &m_buffer);
+  iter.maybeSkipAutomaticInstructions();
+  ma_mov_patch(imm, scratch, Always, HasMOVWT() ? L_MOVWT : L_LDR, iter);
+}
+
+void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) {
+  ma_vadd(dest, src, dest);
+}
+
+void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vadd_f32(dest, src, dest);
+}
+
+void MacroAssembler::sub32(Register src, Register dest) {
+  ma_sub(src, dest, SetCC);
+}
+
+void MacroAssembler::sub32(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_sub(imm, dest, scratch, SetCC);
+}
+
+void MacroAssembler::sub32(const Address& src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(src, scratch, scratch2);
+  ma_sub(scratch, dest, SetCC);
+}
+
+void MacroAssembler::subPtr(Register src, Register dest) { ma_sub(src, dest); }
+
+void MacroAssembler::subPtr(Register src, const Address& dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(dest, scratch, scratch2);
+  ma_sub(src, scratch);
+  ma_str(scratch, dest, scratch2);
+}
+
+void MacroAssembler::subPtr(Imm32 imm, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_sub(imm, dest, scratch);
+}
+
+void MacroAssembler::subPtr(const Address& addr, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(addr, scratch, scratch2);
+  ma_sub(scratch, dest);
+}
+
+void MacroAssembler::sub64(Register64 src, Register64 dest) {
+  ma_sub(src.low, dest.low, SetCC);
+  ma_sbc(src.high, dest.high, LeaveCC);
+}
+
+void MacroAssembler::sub64(Imm64 imm, Register64 dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_sub(imm.low(), dest.low, scratch, SetCC);
+  ma_sbc(imm.hi(), dest.high, scratch, LeaveCC);
+}
+
+void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) {
+  ma_vsub(dest, src, dest);
+}
+
+void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vsub_f32(dest, src, dest);
+}
+
+void MacroAssembler::mul32(Register rhs, Register srcDest) {
+  as_mul(srcDest, srcDest, rhs);
+}
+
+void MacroAssembler::mul32(Imm32 imm, Register srcDest) {
+  ScratchRegisterScope scratch(*this);
+  move32(imm, scratch);
+  mul32(scratch, srcDest);
+}
+
+void MacroAssembler::mulHighUnsigned32(Imm32 imm, Register src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_umull(src, imm, dest, scratch, scratch);
+}
+
+void MacroAssembler::mulPtr(Register rhs, Register srcDest) {
+  as_mul(srcDest, srcDest, rhs);
+}
+
+void MacroAssembler::mul64(Imm64 imm, const Register64& dest) {
+  // LOW32  = LOW(LOW(dest) * LOW(imm));
+  // HIGH32 = LOW(HIGH(dest) * LOW(imm)) [multiply imm into upper bits]
+  //        + LOW(LOW(dest) * HIGH(imm)) [multiply dest into upper bits]
+  //        + HIGH(LOW(dest) * LOW(imm)) [carry]
+
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // HIGH(dest) = LOW(HIGH(dest) * LOW(imm));
+  ma_mov(Imm32(imm.value & 0xFFFFFFFFL), scratch);
+  as_mul(dest.high, dest.high, scratch);
+
+  // high:low = LOW(dest) * LOW(imm);
+  as_umull(scratch2, scratch, dest.low, scratch);
+
+  // HIGH(dest) += high;
+  as_add(dest.high, dest.high, O2Reg(scratch2));
+
+  // HIGH(dest) += LOW(LOW(dest) * HIGH(imm));
+  if (((imm.value >> 32) & 0xFFFFFFFFL) == 5) {
+    as_add(scratch2, dest.low, lsl(dest.low, 2));
+  } else {
+    MOZ_CRASH("Not supported imm");
+  }
+  as_add(dest.high, dest.high, O2Reg(scratch2));
+
+  // LOW(dest) = low;
+  ma_mov(scratch, dest.low);
+}
+
+void MacroAssembler::mul64(Imm64 imm, const Register64& dest,
+                           const Register temp) {
+  // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
+  // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
+  //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
+  //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
+
+  MOZ_ASSERT(temp != dest.high && temp != dest.low);
+
+  // Compute mul64
+  ScratchRegisterScope scratch(*this);
+  ma_mul(dest.high, imm.low(), dest.high, scratch);  // (2)
+  ma_mul(dest.low, imm.hi(), temp, scratch);         // (3)
+  ma_add(dest.high, temp, temp);
+  ma_umull(dest.low, imm.low(), dest.high, dest.low, scratch);  // (4) + (1)
+  ma_add(temp, dest.high, dest.high);
+}
+
+void MacroAssembler::mul64(const Register64& src, const Register64& dest,
+                           const Register temp) {
+  // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
+  // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
+  //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
+  //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
+
+  MOZ_ASSERT(dest != src);
+  MOZ_ASSERT(dest.low != src.high && dest.high != src.low);
+
+  // Compute mul64
+  ma_mul(dest.high, src.low, dest.high);  // (2)
+  ma_mul(src.high, dest.low, temp);       // (3)
+  ma_add(dest.high, temp, temp);
+  ma_umull(dest.low, src.low, dest.high, dest.low);  // (4) + (1)
+  ma_add(temp, dest.high, dest.high);
+}
+
+void MacroAssembler::mulBy3(Register src, Register dest) {
+  as_add(dest, src, lsl(src, 1));
+}
+
+void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vmul_f32(dest, src, dest);
+}
+
+void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) {
+  ma_vmul(dest, src, dest);
+}
+
+void MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp,
+                                  FloatRegister dest) {
+  ScratchRegisterScope scratch(*this);
+  ScratchDoubleScope scratchDouble(*this);
+
+  movePtr(imm, scratch);
+  ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), scratchDouble);
+  mulDouble(scratchDouble, dest);
+}
+
+void MacroAssembler::quotient32(Register rhs, Register srcDest,
+                                bool isUnsigned) {
+  MOZ_ASSERT(HasIDIV());
+  if (isUnsigned) {
+    ma_udiv(srcDest, rhs, srcDest);
+  } else {
+    ma_sdiv(srcDest, rhs, srcDest);
+  }
+}
+
+void MacroAssembler::remainder32(Register rhs, Register srcDest,
+                                 bool isUnsigned) {
+  MOZ_ASSERT(HasIDIV());
+
+  ScratchRegisterScope scratch(*this);
+  if (isUnsigned) {
+    ma_umod(srcDest, rhs, srcDest, scratch);
+  } else {
+    ma_smod(srcDest, rhs, srcDest, scratch);
+  }
+}
+
+void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vdiv_f32(dest, src, dest);
+}
+
+void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) {
+  ma_vdiv(dest, src, dest);
+}
+
+void MacroAssembler::inc64(AbsoluteAddress dest) {
+  ScratchRegisterScope scratch(*this);
+
+  ma_strd(r0, r1, EDtrAddr(sp, EDtrOffImm(-8)), PreIndex);
+
+  ma_mov(Imm32((int32_t)dest.addr), scratch);
+  ma_ldrd(EDtrAddr(scratch, EDtrOffImm(0)), r0, r1);
+
+  as_add(r0, r0, Imm8(1), SetCC);
+  as_adc(r1, r1, Imm8(0), LeaveCC);
+
+  ma_strd(r0, r1, EDtrAddr(scratch, EDtrOffImm(0)));
+  ma_ldrd(EDtrAddr(sp, EDtrOffImm(8)), r0, r1, PostIndex);
+}
+
+void MacroAssembler::neg32(Register reg) { ma_neg(reg, reg, SetCC); }
+
+void MacroAssembler::neg64(Register64 reg) {
+  as_rsb(reg.low, reg.low, Imm8(0), SetCC);
+  as_rsc(reg.high, reg.high, Imm8(0));
+}
+
+void MacroAssembler::negPtr(Register reg) { neg32(reg); }
+
+void MacroAssembler::negateDouble(FloatRegister reg) { ma_vneg(reg, reg); }
+
+void MacroAssembler::negateFloat(FloatRegister reg) { ma_vneg_f32(reg, reg); }
+
+void MacroAssembler::abs32(Register src, Register dest) {
+  as_cmp(src, Imm8(0));
+  as_rsb(dest, src, Imm8(0), LeaveCC, LessThan);
+  if (dest != src) {
+    as_mov(dest, O2Reg(src), LeaveCC, GreaterThanOrEqual);
+  }
+}
+
+void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vabs_f32(src, dest);
+}
+
+void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) {
+  ma_vabs(src, dest);
+}
+
+void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) {
+  ma_vsqrt_f32(src, dest);
+}
+
+void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) {
+  ma_vsqrt(src, dest);
+}
+
+void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest,
+                                bool handleNaN) {
+  minMaxFloat32(srcDest, other, handleNaN, false);
+}
+
+void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest,
+                               bool handleNaN) {
+  minMaxDouble(srcDest, other, handleNaN, false);
+}
+
+void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest,
+                                bool handleNaN) {
+  minMaxFloat32(srcDest, other, handleNaN, true);
+}
+
+void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest,
+                               bool handleNaN) {
+  minMaxDouble(srcDest, other, handleNaN, true);
+}
+
+// ===============================================================
+// Shift functions
+
+void MacroAssembler::lshiftPtr(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  ma_lsl(imm, dest, dest);
+}
+
+void MacroAssembler::lshiftPtr(Register src, Register dest) {
+  ma_lsl(src, dest, dest);
+}
+
+void MacroAssembler::lshift64(Imm32 imm, Register64 dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  if (imm.value == 0) {
+    return;
+  }
+
+  if (imm.value < 32) {
+    as_mov(dest.high, lsl(dest.high, imm.value));
+    as_orr(dest.high, dest.high, lsr(dest.low, 32 - imm.value));
+    as_mov(dest.low, lsl(dest.low, imm.value));
+  } else {
+    as_mov(dest.high, lsl(dest.low, imm.value - 32));
+    ma_mov(Imm32(0), dest.low);
+  }
+}
+
+void MacroAssembler::lshift64(Register unmaskedShift, Register64 dest) {
+  // dest.high = dest.high << shift | dest.low << shift - 32 | dest.low >> 32 -
+  // shift Note: one of the two dest.low shift will always yield zero due to
+  // negative shift.
+
+  ScratchRegisterScope shift(*this);
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.high, lsl(dest.high, shift));
+  as_sub(shift, shift, Imm8(32));
+  as_orr(dest.high, dest.high, lsl(dest.low, shift));
+  ma_neg(shift, shift);
+  as_orr(dest.high, dest.high, lsr(dest.low, shift));
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.low, lsl(dest.low, shift));
+}
+
+void MacroAssembler::lshift32(Register src, Register dest) {
+  ma_lsl(src, dest, dest);
+}
+
+void MacroAssembler::flexibleLshift32(Register src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  as_and(scratch, src, Imm8(0x1F));
+  lshift32(scratch, dest);
+}
+
+void MacroAssembler::lshift32(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  lshiftPtr(imm, dest);
+}
+
+void MacroAssembler::rshiftPtr(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  if (imm.value) {
+    ma_lsr(imm, dest, dest);
+  }
+}
+
+void MacroAssembler::rshiftPtr(Register src, Register dest) {
+  ma_lsr(src, dest, dest);
+}
+
+void MacroAssembler::rshift32(Register src, Register dest) {
+  ma_lsr(src, dest, dest);
+}
+
+void MacroAssembler::flexibleRshift32(Register src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  as_and(scratch, src, Imm8(0x1F));
+  rshift32(scratch, dest);
+}
+
+void MacroAssembler::rshift32(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  rshiftPtr(imm, dest);
+}
+
+void MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  if (imm.value) {
+    ma_asr(imm, dest, dest);
+  }
+}
+
+void MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  if (!imm.value) {
+    return;
+  }
+
+  if (imm.value < 32) {
+    as_mov(dest.low, lsr(dest.low, imm.value));
+    as_orr(dest.low, dest.low, lsl(dest.high, 32 - imm.value));
+    as_mov(dest.high, asr(dest.high, imm.value));
+  } else if (imm.value == 32) {
+    as_mov(dest.low, O2Reg(dest.high));
+    as_mov(dest.high, asr(dest.high, 31));
+  } else {
+    as_mov(dest.low, asr(dest.high, imm.value - 32));
+    as_mov(dest.high, asr(dest.high, 31));
+  }
+}
+
+void MacroAssembler::rshift64Arithmetic(Register unmaskedShift,
+                                        Register64 dest) {
+  Label proceed;
+
+  // dest.low = dest.low >>> shift | dest.high <<< 32 - shift
+  // if (shift - 32 >= 0)
+  //   dest.low |= dest.high >>> shift - 32
+  // Note: Negative shifts yield a zero as result, except for the signed
+  //       right shift. Therefore we need to test for it and only do it if
+  //       it isn't negative.
+  ScratchRegisterScope shift(*this);
+
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.low, lsr(dest.low, shift));
+  as_rsb(shift, shift, Imm8(32));
+  as_orr(dest.low, dest.low, lsl(dest.high, shift));
+  ma_neg(shift, shift, SetCC);
+  ma_b(&proceed, Signed);
+
+  as_orr(dest.low, dest.low, asr(dest.high, shift));
+
+  bind(&proceed);
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.high, asr(dest.high, shift));
+}
+
+void MacroAssembler::rshift32Arithmetic(Register src, Register dest) {
+  ma_asr(src, dest, dest);
+}
+
+void MacroAssembler::rshift32Arithmetic(Imm32 imm, Register dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+  rshiftPtrArithmetic(imm, dest);
+}
+
+void MacroAssembler::flexibleRshift32Arithmetic(Register src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  as_and(scratch, src, Imm8(0x1F));
+  rshift32Arithmetic(scratch, dest);
+}
+
+void MacroAssembler::rshift64(Imm32 imm, Register64 dest) {
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+  if (!imm.value) {
+    return;
+  }
+
+  if (imm.value < 32) {
+    as_mov(dest.low, lsr(dest.low, imm.value));
+    as_orr(dest.low, dest.low, lsl(dest.high, 32 - imm.value));
+    as_mov(dest.high, lsr(dest.high, imm.value));
+  } else if (imm.value == 32) {
+    ma_mov(dest.high, dest.low);
+    ma_mov(Imm32(0), dest.high);
+  } else {
+    ma_lsr(Imm32(imm.value - 32), dest.high, dest.low);
+    ma_mov(Imm32(0), dest.high);
+  }
+}
+
+void MacroAssembler::rshift64(Register unmaskedShift, Register64 dest) {
+  // dest.low = dest.low >> shift | dest.high >> shift - 32 | dest.high << 32 -
+  // shift Note: one of the two dest.high shifts will always yield zero due to
+  // negative shift.
+
+  ScratchRegisterScope shift(*this);
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.low, lsr(dest.low, shift));
+  as_sub(shift, shift, Imm8(32));
+  as_orr(dest.low, dest.low, lsr(dest.high, shift));
+  ma_neg(shift, shift);
+  as_orr(dest.low, dest.low, lsl(dest.high, shift));
+  as_and(shift, unmaskedShift, Imm8(0x3f));
+  as_mov(dest.high, lsr(dest.high, shift));
+}
+
+// ===============================================================
+// Rotate functions
+void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) {
+  if (count.value) {
+    ma_rol(count, input, dest);
+  } else {
+    ma_mov(input, dest);
+  }
+}
+
+void MacroAssembler::rotateLeft(Register count, Register input, Register dest) {
+  ScratchRegisterScope scratch(*this);
+  ma_rol(count, input, dest, scratch);
+}
+
+void MacroAssembler::rotateLeft64(Imm32 count, Register64 input,
+                                  Register64 dest, Register temp) {
+  MOZ_ASSERT(temp == InvalidReg);
+  MOZ_ASSERT(input.low != dest.high && input.high != dest.low);
+
+  int32_t amount = count.value & 0x3f;
+  if (amount > 32) {
+    rotateRight64(Imm32(64 - amount), input, dest, temp);
+  } else {
+    ScratchRegisterScope scratch(*this);
+    if (amount == 0) {
+      ma_mov(input.low, dest.low);
+      ma_mov(input.high, dest.high);
+    } else if (amount == 32) {
+      ma_mov(input.low, scratch);
+      ma_mov(input.high, dest.low);
+      ma_mov(scratch, dest.high);
+    } else {
+      MOZ_ASSERT(0 < amount && amount < 32);
+      ma_mov(dest.high, scratch);
+      as_mov(dest.high, lsl(dest.high, amount));
+      as_orr(dest.high, dest.high, lsr(dest.low, 32 - amount));
+      as_mov(dest.low, lsl(dest.low, amount));
+      as_orr(dest.low, dest.low, lsr(scratch, 32 - amount));
+    }
+  }
+}
+
+void MacroAssembler::rotateLeft64(Register shift, Register64 src,
+                                  Register64 dest, Register temp) {
+  MOZ_ASSERT(shift != temp);
+  MOZ_ASSERT(src == dest);
+  MOZ_ASSERT(temp != src.low && temp != src.high);
+  MOZ_ASSERT(shift != src.low && shift != src.high);
+  MOZ_ASSERT(temp != InvalidReg);
+
+  ScratchRegisterScope shift_value(*this);
+  Label high, done;
+
+  ma_mov(src.high, temp);
+  as_and(shift_value, shift, Imm8(0x3f));
+  as_cmp(shift_value, Imm8(32));
+  ma_b(&high, GreaterThanOrEqual);
+
+  // high = high << shift | low >> 32 - shift
+  // low = low << shift | high >> 32 - shift
+  as_mov(dest.high, lsl(src.high, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.high, dest.high, lsr(src.low, shift_value));
+
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_mov(dest.low, lsl(src.low, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.low, dest.low, lsr(temp, shift_value));
+
+  ma_b(&done);
+
+  // A 32 - 64 shift is a 0 - 32 shift in the other direction.
+  bind(&high);
+  as_rsb(shift_value, shift_value, Imm8(64));
+
+  as_mov(dest.high, lsr(src.high, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.high, dest.high, lsl(src.low, shift_value));
+
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_mov(dest.low, lsr(src.low, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.low, dest.low, lsl(temp, shift_value));
+
+  bind(&done);
+}
+
+void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) {
+  if (count.value) {
+    ma_ror(count, input, dest);
+  } else {
+    ma_mov(input, dest);
+  }
+}
+
+void MacroAssembler::rotateRight(Register count, Register input,
+                                 Register dest) {
+  ma_ror(count, input, dest);
+}
+
+void MacroAssembler::rotateRight64(Imm32 count, Register64 input,
+                                   Register64 dest, Register temp) {
+  MOZ_ASSERT(temp == InvalidReg);
+  MOZ_ASSERT(input.low != dest.high && input.high != dest.low);
+
+  int32_t amount = count.value & 0x3f;
+  if (amount > 32) {
+    rotateLeft64(Imm32(64 - amount), input, dest, temp);
+  } else {
+    ScratchRegisterScope scratch(*this);
+    if (amount == 0) {
+      ma_mov(input.low, dest.low);
+      ma_mov(input.high, dest.high);
+    } else if (amount == 32) {
+      ma_mov(input.low, scratch);
+      ma_mov(input.high, dest.low);
+      ma_mov(scratch, dest.high);
+    } else {
+      MOZ_ASSERT(0 < amount && amount < 32);
+      ma_mov(dest.high, scratch);
+      as_mov(dest.high, lsr(dest.high, amount));
+      as_orr(dest.high, dest.high, lsl(dest.low, 32 - amount));
+      as_mov(dest.low, lsr(dest.low, amount));
+      as_orr(dest.low, dest.low, lsl(scratch, 32 - amount));
+    }
+  }
+}
+
+void MacroAssembler::rotateRight64(Register shift, Register64 src,
+                                   Register64 dest, Register temp) {
+  MOZ_ASSERT(shift != temp);
+  MOZ_ASSERT(src == dest);
+  MOZ_ASSERT(temp != src.low && temp != src.high);
+  MOZ_ASSERT(shift != src.low && shift != src.high);
+  MOZ_ASSERT(temp != InvalidReg);
+
+  ScratchRegisterScope shift_value(*this);
+  Label high, done;
+
+  ma_mov(src.high, temp);
+  as_and(shift_value, shift, Imm8(0x3f));
+  as_cmp(shift_value, Imm8(32));
+  ma_b(&high, GreaterThanOrEqual);
+
+  // high = high >> shift | low << 32 - shift
+  // low = low >> shift | high << 32 - shift
+  as_mov(dest.high, lsr(src.high, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.high, dest.high, lsl(src.low, shift_value));
+
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_mov(dest.low, lsr(src.low, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.low, dest.low, lsl(temp, shift_value));
+
+  ma_b(&done);
+
+  // A 32 - 64 shift is a 0 - 32 shift in the other direction.
+  bind(&high);
+  as_rsb(shift_value, shift_value, Imm8(64));
+
+  as_mov(dest.high, lsl(src.high, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.high, dest.high, lsr(src.low, shift_value));
+
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_mov(dest.low, lsl(src.low, shift_value));
+  as_rsb(shift_value, shift_value, Imm8(32));
+  as_orr(dest.low, dest.low, lsr(temp, shift_value));
+
+  bind(&done);
+}
+
+// ===============================================================
+// Condition functions
+
+void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs,
+                             Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load8{Zero,Sign}Extend() and cmp32Set() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  Imm32 imm(0);
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      imm = Imm32(uint8_t(rhs.value));
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      imm = Imm32(int8_t(rhs.value));
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  ma_dataTransferN(IsLoad, 8, isSigned, lhs.base, Imm32(lhs.offset), scratch,
+                   scratch2);
+  ma_cmp(scratch, imm, scratch2);
+  emitSet(cond, dest);
+}
+
+void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs,
+                              Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load16{Zero,Sign}Extend() and cmp32Set() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  Imm32 imm(0);
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      imm = Imm32(uint16_t(rhs.value));
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      imm = Imm32(int16_t(rhs.value));
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  ma_dataTransferN(IsLoad, 16, isSigned, lhs.base, Imm32(lhs.offset), scratch,
+                   scratch2);
+  ma_cmp(scratch, imm, scratch2);
+  emitSet(cond, dest);
+}
+
+template <typename T1, typename T2>
+void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) {
+  cmp32(lhs, rhs);
+  emitSet(cond, dest);
+}
+
+void MacroAssembler::cmp64Set(Condition cond, Address lhs, Imm64 rhs,
+                              Register dest) {
+  Label success, done;
+
+  branch64(cond, lhs, rhs, &success);
+  move32(Imm32(0), dest);
+  jump(&done);
+  bind(&success);
+  move32(Imm32(1), dest);
+  bind(&done);
+}
+
+template <typename T1, typename T2>
+void MacroAssembler::cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) {
+  cmpPtr(lhs, rhs);
+  emitSet(cond, dest);
+}
+
+// ===============================================================
+// Bit counting functions
+
+void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) {
+  ma_clz(src, dest);
+}
+
+void MacroAssembler::clz64(Register64 src, Register dest) {
+  ScratchRegisterScope scratch(*this);
+
+  ma_clz(src.high, scratch);
+  as_cmp(scratch, Imm8(32));
+  ma_mov(scratch, dest, LeaveCC, NotEqual);
+  ma_clz(src.low, dest, Equal);
+  as_add(dest, dest, Imm8(32), LeaveCC, Equal);
+}
+
+void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) {
+  ScratchRegisterScope scratch(*this);
+  ma_ctz(src, dest, scratch);
+}
+
+void MacroAssembler::ctz64(Register64 src, Register dest) {
+  Label done, high;
+  as_cmp(src.low, Imm8(0));
+  ma_b(&high, Equal);
+
+  ctz32(src.low, dest, /* knownNotZero = */ true);
+  ma_b(&done);
+
+  bind(&high);
+  ctz32(src.high, dest, /* knownNotZero = */ false);
+  as_add(dest, dest, Imm8(32));
+
+  bind(&done);
+}
+
+void MacroAssembler::popcnt32(Register input, Register output, Register tmp) {
+  // Equivalent to GCC output of mozilla::CountPopulation32()
+
+  ScratchRegisterScope scratch(*this);
+
+  if (input != output) {
+    ma_mov(input, output);
+  }
+  as_mov(tmp, asr(output, 1));
+  ma_and(Imm32(0x55555555), tmp, scratch);
+  ma_sub(output, tmp, output);
+  as_mov(tmp, asr(output, 2));
+  ma_mov(Imm32(0x33333333), scratch);
+  ma_and(scratch, output);
+  ma_and(scratch, tmp);
+  ma_add(output, tmp, output);
+  as_add(output, output, lsr(output, 4));
+  ma_and(Imm32(0xF0F0F0F), output, scratch);
+  as_add(output, output, lsl(output, 8));
+  as_add(output, output, lsl(output, 16));
+  as_mov(output, asr(output, 24));
+}
+
+void MacroAssembler::popcnt64(Register64 src, Register64 dest, Register tmp) {
+  MOZ_ASSERT(dest.low != tmp);
+  MOZ_ASSERT(dest.high != tmp);
+  MOZ_ASSERT(dest.low != dest.high);
+  // The source and destination can overlap. Therefore make sure we don't
+  // clobber the source before we have the data.
+  if (dest.low != src.high) {
+    popcnt32(src.low, dest.low, tmp);
+    popcnt32(src.high, dest.high, tmp);
+  } else {
+    MOZ_ASSERT(dest.high != src.high);
+    popcnt32(src.low, dest.high, tmp);
+    popcnt32(src.high, dest.low, tmp);
+  }
+  ma_add(dest.high, dest.low);
+  ma_mov(Imm32(0), dest.high);
+}
+
+// ===============================================================
+// Branch functions
+
+void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs,
+                             Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load8{Zero,Sign}Extend() and branch32() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  Imm32 imm(0);
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      imm = Imm32(uint8_t(rhs.value));
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      imm = Imm32(int8_t(rhs.value));
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  ma_dataTransferN(IsLoad, 8, isSigned, lhs.base, Imm32(lhs.offset), scratch,
+                   scratch2);
+  ma_cmp(scratch, imm, scratch2);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs,
+                             Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load8{Zero,Sign}Extend() and branch32() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  if (isSigned) {
+    Register index = lhs.index;
+
+    // ARMv7 does not have LSL on an index register with an extended load.
+    if (lhs.scale != TimesOne) {
+      ma_lsl(Imm32::ShiftOf(lhs.scale), index, scratch);
+      index = scratch;
+    }
+
+    if (lhs.offset != 0) {
+      if (index != scratch) {
+        ma_mov(index, scratch);
+        index = scratch;
+      }
+      ma_add(Imm32(lhs.offset), index, scratch2);
+    }
+    ma_ldrsb(EDtrAddr(lhs.base, EDtrOffReg(index)), scratch);
+  } else {
+    Register base = lhs.base;
+    uint32_t scale = Imm32::ShiftOf(lhs.scale).value;
+
+    if (lhs.offset == 0) {
+      ma_ldrb(DTRAddr(base, DtrRegImmShift(lhs.index, LSL, scale)), scratch);
+    } else {
+      ma_add(base, Imm32(lhs.offset), scratch, scratch2);
+      ma_ldrb(DTRAddr(scratch, DtrRegImmShift(lhs.index, LSL, scale)), scratch);
+    }
+  }
+
+  ma_cmp(scratch, rhs);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs,
+                              Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Inlined calls to load16{Zero,Sign}Extend() and branch32() to acquire
+  // exclusive access to scratch registers.
+
+  bool isSigned;
+  Imm32 imm(0);
+  switch (cond) {
+    case Assembler::Equal:
+    case Assembler::NotEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+      isSigned = false;
+      imm = Imm32(uint16_t(rhs.value));
+      break;
+
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+      isSigned = true;
+      imm = Imm32(int16_t(rhs.value));
+      break;
+
+    default:
+      MOZ_CRASH("unexpected condition");
+  }
+
+  ma_dataTransferN(IsLoad, 16, isSigned, lhs.base, Imm32(lhs.offset), scratch,
+                   scratch2);
+  ma_cmp(scratch, imm, scratch2);
+  ma_b(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs,
+                              L label) {
+  ma_cmp(lhs, rhs);
+  ma_b(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 rhs,
+                              L label) {
+  ScratchRegisterScope scratch(*this);
+
+  ma_cmp(lhs, rhs, scratch);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs,
+                              Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 rhs,
+                              Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs, scratch2);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
+                              Register rhs, Label* label) {
+  ScratchRegisterScope scratch(*this);
+
+  // Load into scratch.
+  movePtr(ImmWord(uintptr_t(lhs.addr)), scratch);
+  ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+  ma_cmp(scratch, rhs);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
+                              Imm32 rhs, Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  // Load into scratch.
+  movePtr(ImmWord(uintptr_t(lhs.addr)), scratch);
+  ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+  ma_cmp(scratch, rhs, scratch2);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs,
+                              Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  {
+    ScratchRegisterScope scratch(*this);
+
+    Register base = lhs.base;
+    uint32_t scale = Imm32::ShiftOf(lhs.scale).value;
+
+    // Load lhs into scratch2.
+    if (lhs.offset != 0) {
+      ma_add(base, Imm32(lhs.offset), scratch, scratch2);
+      ma_ldr(DTRAddr(scratch, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+    } else {
+      ma_ldr(DTRAddr(base, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+    }
+  }
+  branch32(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs,
+                              Register rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  {
+    ScratchRegisterScope scratch(*this);
+
+    Register base = lhs.base;
+    uint32_t scale = Imm32::ShiftOf(lhs.scale).value;
+
+    // Load lhs into scratch2.
+    if (lhs.offset != 0) {
+      ma_add(base, Imm32(lhs.offset), scratch, scratch2);
+      ma_ldr(DTRAddr(scratch, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+    } else {
+      ma_ldr(DTRAddr(base, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+    }
+  }
+  branch32(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, wasm::SymbolicAddress lhs,
+                              Imm32 rhs, Label* label) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+
+  movePtr(lhs, scratch);
+  ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+  ma_cmp(scratch, rhs, scratch2);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs, Imm64 val,
+                              Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+
+  Label done;
+
+  if (cond == Assembler::Equal) {
+    branch32(Assembler::NotEqual, lhs, val.firstHalf(), &done);
+  } else {
+    branch32(Assembler::NotEqual, lhs, val.firstHalf(), label);
+  }
+  branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)),
+           val.secondHalf(), label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs,
+                              Register64 rhs, Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+
+  Label done;
+
+  if (cond == Assembler::Equal) {
+    branch32(Assembler::NotEqual, lhs, rhs.low, &done);
+  } else {
+    branch32(Assembler::NotEqual, lhs, rhs.low, label);
+  }
+  branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)), rhs.high,
+           label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs,
+                              const Address& rhs, Register scratch,
+                              Label* label) {
+  MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+             "other condition codes not supported");
+  MOZ_ASSERT(lhs.base != scratch);
+  MOZ_ASSERT(rhs.base != scratch);
+
+  Label done;
+
+  load32(rhs, scratch);
+  if (cond == Assembler::Equal) {
+    branch32(Assembler::NotEqual, lhs, scratch, &done);
+  } else {
+    branch32(Assembler::NotEqual, lhs, scratch, label);
+  }
+
+  load32(Address(rhs.base, rhs.offset + sizeof(uint32_t)), scratch);
+  branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)), scratch,
+           label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branch64(Condition cond, Register64 lhs, Imm64 val,
+                              Label* success, Label* fail) {
+  bool fallthrough = false;
+  Label fallthroughLabel;
+
+  if (!fail) {
+    fail = &fallthroughLabel;
+    fallthrough = true;
+  }
+
+  switch (cond) {
+    case Assembler::Equal:
+      branch32(Assembler::NotEqual, lhs.low, val.low(), fail);
+      branch32(Assembler::Equal, lhs.high, val.hi(), success);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    case Assembler::NotEqual:
+      branch32(Assembler::NotEqual, lhs.low, val.low(), success);
+      branch32(Assembler::NotEqual, lhs.high, val.hi(), success);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual: {
+      Assembler::Condition cond1 = Assembler::ConditionWithoutEqual(cond);
+      Assembler::Condition cond2 =
+          Assembler::ConditionWithoutEqual(Assembler::InvertCondition(cond));
+      Assembler::Condition cond3 = Assembler::UnsignedCondition(cond);
+
+      cmp32(lhs.high, val.hi());
+      ma_b(success, cond1);
+      ma_b(fail, cond2);
+      cmp32(lhs.low, val.low());
+      ma_b(success, cond3);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    }
+    default:
+      MOZ_CRASH("Condition code not supported");
+      break;
+  }
+
+  if (fallthrough) {
+    bind(fail);
+  }
+}
+
+void MacroAssembler::branch64(Condition cond, Register64 lhs, Register64 rhs,
+                              Label* success, Label* fail) {
+  bool fallthrough = false;
+  Label fallthroughLabel;
+
+  if (!fail) {
+    fail = &fallthroughLabel;
+    fallthrough = true;
+  }
+
+  switch (cond) {
+    case Assembler::Equal:
+      branch32(Assembler::NotEqual, lhs.low, rhs.low, fail);
+      branch32(Assembler::Equal, lhs.high, rhs.high, success);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    case Assembler::NotEqual:
+      branch32(Assembler::NotEqual, lhs.low, rhs.low, success);
+      branch32(Assembler::NotEqual, lhs.high, rhs.high, success);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    case Assembler::LessThan:
+    case Assembler::LessThanOrEqual:
+    case Assembler::GreaterThan:
+    case Assembler::GreaterThanOrEqual:
+    case Assembler::Below:
+    case Assembler::BelowOrEqual:
+    case Assembler::Above:
+    case Assembler::AboveOrEqual: {
+      Assembler::Condition cond1 = Assembler::ConditionWithoutEqual(cond);
+      Assembler::Condition cond2 =
+          Assembler::ConditionWithoutEqual(Assembler::InvertCondition(cond));
+      Assembler::Condition cond3 = Assembler::UnsignedCondition(cond);
+
+      cmp32(lhs.high, rhs.high);
+      ma_b(success, cond1);
+      ma_b(fail, cond2);
+      cmp32(lhs.low, rhs.low);
+      ma_b(success, cond3);
+      if (!fallthrough) {
+        jump(fail);
+      }
+      break;
+    }
+    default:
+      MOZ_CRASH("Condition code not supported");
+      break;
+  }
+
+  if (fallthrough) {
+    bind(fail);
+  }
+}
+
+template <class L>
+void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs,
+                               L label) {
+  branch32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs,
+                               Label* label) {
+  branch32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs,
+                               Label* label) {
+  branchPtr(cond, lhs, ImmWord(uintptr_t(rhs.value)), label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs,
+                               Label* label) {
+  ScratchRegisterScope scratch(*this);
+  movePtr(rhs, scratch);
+  branchPtr(cond, lhs, scratch, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs,
+                               Label* label) {
+  branch32(cond, lhs, Imm32(rhs.value), label);
+}
+
+template <class L>
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs,
+                               L label) {
+  branch32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs,
+                               Label* label) {
+  branchPtr(cond, lhs, ImmWord(uintptr_t(rhs.value)), label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs,
+                               Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs,
+                               Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
+                               Register rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
+                               ImmWord rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, wasm::SymbolicAddress lhs,
+                               Register rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  loadPtr(lhs, scratch2);
+  branchPtr(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
+                               ImmWord rhs, Label* label) {
+  branch32(cond, lhs, Imm32(rhs.value), label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
+                               Register rhs, Label* label) {
+  branch32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchPrivatePtr(Condition cond, const Address& lhs,
+                                      Register rhs, Label* label) {
+  branchPtr(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs,
+                                 FloatRegister rhs, Label* label) {
+  compareFloat(lhs, rhs);
+
+  if (cond == DoubleNotEqual) {
+    // Force the unordered cases not to jump.
+    Label unordered;
+    ma_b(&unordered, VFP_Unordered);
+    ma_b(label, VFP_NotEqualOrUnordered);
+    bind(&unordered);
+    return;
+  }
+
+  if (cond == DoubleEqualOrUnordered) {
+    ma_b(label, VFP_Unordered);
+    ma_b(label, VFP_Equal);
+    return;
+  }
+
+  ma_b(label, ConditionFromDoubleCondition(cond));
+}
+
+void MacroAssembler::branchTruncateFloat32MaybeModUint32(FloatRegister src,
+                                                         Register dest,
+                                                         Label* fail) {
+  branchTruncateFloat32ToInt32(src, dest, fail);
+}
+
+void MacroAssembler::branchTruncateFloat32ToInt32(FloatRegister src,
+                                                  Register dest, Label* fail) {
+  ScratchFloat32Scope scratchFloat32(*this);
+  ScratchRegisterScope scratch(*this);
+
+  ma_vcvt_F32_I32(src, scratchFloat32.sintOverlay());
+  ma_vxfer(scratchFloat32, dest);
+  ma_cmp(dest, Imm32(0x7fffffff), scratch);
+  ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
+  ma_b(fail, Assembler::Equal);
+}
+
+void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs,
+                                  FloatRegister rhs, Label* label) {
+  compareDouble(lhs, rhs);
+
+  if (cond == DoubleNotEqual) {
+    // Force the unordered cases not to jump.
+    Label unordered;
+    ma_b(&unordered, VFP_Unordered);
+    ma_b(label, VFP_NotEqualOrUnordered);
+    bind(&unordered);
+    return;
+  }
+
+  if (cond == DoubleEqualOrUnordered) {
+    ma_b(label, VFP_Unordered);
+    ma_b(label, VFP_Equal);
+    return;
+  }
+
+  ma_b(label, ConditionFromDoubleCondition(cond));
+}
+
+void MacroAssembler::branchTruncateDoubleMaybeModUint32(FloatRegister src,
+                                                        Register dest,
+                                                        Label* fail) {
+  branchTruncateDoubleToInt32(src, dest, fail);
+}
+
+// There are two options for implementing branchTruncateDoubleToInt32:
+//
+// 1. Convert the floating point value to an integer, if it did not fit, then it
+// was clamped to INT_MIN/INT_MAX, and we can test it. NOTE: if the value
+// really was supposed to be INT_MAX / INT_MIN then it will be wrong.
+//
+// 2. Convert the floating point value to an integer, if it did not fit, then it
+// set one or two bits in the fpcsr. Check those.
+void MacroAssembler::branchTruncateDoubleToInt32(FloatRegister src,
+                                                 Register dest, Label* fail) {
+  ScratchDoubleScope scratchDouble(*this);
+  FloatRegister scratchSIntReg = scratchDouble.sintOverlay();
+  ScratchRegisterScope scratch(*this);
+
+  ma_vcvt_F64_I32(src, scratchSIntReg);
+  ma_vxfer(scratchSIntReg, dest);
+  ma_cmp(dest, Imm32(0x7fffffff), scratch);
+  ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
+  ma_b(fail, Assembler::Equal);
+}
+
+template <typename T>
+void MacroAssembler::branchAdd32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  add32(src, dest);
+  as_b(label, cond);
+}
+
+template <typename T>
+void MacroAssembler::branchSub32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  sub32(src, dest);
+  j(cond, label);
+}
+
+template <typename T>
+void MacroAssembler::branchMul32(Condition cond, T src, Register dest,
+                                 Label* label) {
+  MOZ_ASSERT(cond == Assembler::Overflow);
+  ScratchRegisterScope scratch(*this);
+  Assembler::Condition overflow_cond =
+      ma_check_mul(src, dest, dest, scratch, cond);
+  j(overflow_cond, label);
+}
+
+template <typename T>
+void MacroAssembler::branchRshift32(Condition cond, T src, Register dest,
+                                    Label* label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero);
+  rshift32(src, dest);
+  branch32(cond == Zero ? Equal : NotEqual, dest, Imm32(0), label);
+}
+
+void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) {
+  MOZ_ASSERT(cond == Overflow);
+  neg32(reg);
+  j(cond, label);
+}
+
+void MacroAssembler::branchAdd64(Condition cond, Imm64 imm, Register64 dest,
+                                 Label* label) {
+  ScratchRegisterScope scratch(*this);
+  ma_add(imm.low(), dest.low, scratch, SetCC);
+  ma_adc(imm.hi(), dest.high, scratch, SetCC);
+  j(cond, label);
+}
+
+template <typename T>
+void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest,
+                                  Label* label) {
+  branchAdd32(cond, src, dest, label);
+}
+
+template <typename T>
+void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest,
+                                  Label* label) {
+  branchSub32(cond, src, dest, label);
+}
+
+void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest,
+                                  Label* label) {
+  branchMul32(cond, src, dest, label);
+}
+
+void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs,
+                                  Label* label) {
+  ScratchRegisterScope scratch(*this);
+  ma_sub(rhs, lhs, scratch, SetCC);
+  as_b(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs,
+                                  L label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+             cond == NotSigned);
+  // x86 likes test foo, foo rather than cmp foo, #0.
+  // Convert the former into the latter.
+  if (lhs == rhs && (cond == Zero || cond == NonZero)) {
+    as_cmp(lhs, Imm8(0));
+  } else {
+    ma_tst(lhs, rhs);
+  }
+  ma_b(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs,
+                                  L label) {
+  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+             cond == NotSigned);
+  ScratchRegisterScope scratch(*this);
+  ma_tst(lhs, rhs, scratch);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs,
+                                  Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  load32(lhs, scratch2);
+  branchTest32(cond, scratch2, rhs, label);
+}
+
+void MacroAssembler::branchTest32(Condition cond, const AbsoluteAddress& lhs,
+                                  Imm32 rhs, Label* label) {
+  SecondScratchRegisterScope scratch2(*this);
+  load32(lhs, scratch2);
+  branchTest32(cond, scratch2, rhs, label);
+}
+
+template <class L>
+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs,
+                                   L label) {
+  branchTest32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs,
+                                   Label* label) {
+  branchTest32(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs,
+                                   Imm32 rhs, Label* label) {
+  branchTest32(cond, lhs, rhs, label);
+}
+
+template <class L>
+void MacroAssembler::branchTest64(Condition cond, Register64 lhs,
+                                  Register64 rhs, Register temp, L label) {
+  if (cond == Assembler::Zero || cond == Assembler::NonZero) {
+    ScratchRegisterScope scratch(*this);
+
+    MOZ_ASSERT(lhs.low == rhs.low);
+    MOZ_ASSERT(lhs.high == rhs.high);
+    ma_orr(lhs.low, lhs.high, scratch);
+    branchTestPtr(cond, scratch, scratch, label);
+  } else if (cond == Assembler::Signed || cond == Assembler::NotSigned) {
+    branchTest32(cond, lhs.high, rhs.high, label);
+  } else {
+    MOZ_CRASH("Unsupported condition");
+  }
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond, Register tag,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond, const Address& address,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond,
+                                         const BaseIndex& address,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond,
+                                         const ValueOperand& value,
+                                         Label* label) {
+  branchTestUndefinedImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t,
+                                             Label* label) {
+  Condition c = testUndefined(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, Register tag,
+                                     Label* label) {
+  branchTestInt32Impl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const Address& address,
+                                     Label* label) {
+  branchTestInt32Impl(cond, address, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address,
+                                     Label* label) {
+  branchTestInt32Impl(cond, address, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value,
+                                     Label* label) {
+  branchTestInt32Impl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestInt32Impl(Condition cond, const T& t,
+                                         Label* label) {
+  Condition c = testInt32(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestInt32Truthy(bool truthy,
+                                           const ValueOperand& value,
+                                           Label* label) {
+  Condition c = testInt32Truthy(truthy, value);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestDoubleImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testDouble(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg,
+                                            Label* label) {
+  Condition c = testDoubleTruthy(truthy, reg);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestNumber(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestNumberImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestNumberImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestNumberImpl(Condition cond, const T& t,
+                                          Label* label) {
+  cond = testNumber(cond, t);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, Register tag,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, const Address& address,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond,
+                                       const ValueOperand& value,
+                                       Label* label) {
+  branchTestBooleanImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestBooleanImpl(Condition cond, const T& t,
+                                           Label* label) {
+  Condition c = testBoolean(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestBooleanTruthy(bool truthy,
+                                             const ValueOperand& value,
+                                             Label* label) {
+  Condition c = testBooleanTruthy(truthy, value);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestString(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestStringImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestStringImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestStringImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestStringImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestStringImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testString(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestStringTruthy(bool truthy,
+                                            const ValueOperand& value,
+                                            Label* label) {
+  Condition c = testStringTruthy(truthy, value);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestSymbolImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testSymbol(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestBigIntImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestBigIntImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testBigInt(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestBigIntTruthy(bool truthy,
+                                            const ValueOperand& value,
+                                            Label* label) {
+  Condition c = testBigIntTruthy(truthy, value);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, Register tag,
+                                    Label* label) {
+  branchTestNullImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const Address& address,
+                                    Label* label) {
+  branchTestNullImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address,
+                                    Label* label) {
+  branchTestNullImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value,
+                                    Label* label) {
+  branchTestNullImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestNullImpl(Condition cond, const T& t,
+                                        Label* label) {
+  Condition c = testNull(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, Register tag,
+                                      Label* label) {
+  branchTestObjectImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const Address& address,
+                                      Label* label) {
+  branchTestObjectImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address,
+                                      Label* label) {
+  branchTestObjectImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value,
+                                      Label* label) {
+  branchTestObjectImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestObjectImpl(Condition cond, const T& t,
+                                          Label* label) {
+  Condition c = testObject(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond, const Address& address,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond,
+                                       const ValueOperand& value,
+                                       Label* label) {
+  branchTestGCThingImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& t,
+                                           Label* label) {
+  Condition c = testGCThing(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestPrimitive(Condition cond, Register tag,
+                                         Label* label) {
+  branchTestPrimitiveImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestPrimitive(Condition cond,
+                                         const ValueOperand& value,
+                                         Label* label) {
+  branchTestPrimitiveImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t,
+                                             Label* label) {
+  Condition c = testPrimitive(cond, t);
+  ma_b(label, c);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, Register tag,
+                                     Label* label) {
+  branchTestMagicImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const Address& address,
+                                     Label* label) {
+  branchTestMagicImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address,
+                                     Label* label) {
+  branchTestMagicImpl(cond, address, label);
+}
+
+template <class L>
+void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value,
+                                     L label) {
+  branchTestMagicImpl(cond, value, label);
+}
+
+template <typename T, class L>
+void MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label) {
+  cond = testMagic(cond, t);
+  ma_b(label, cond);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const Address& valaddr,
+                                     JSWhyMagic why, Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+
+  Label notMagic;
+  if (cond == Assembler::Equal) {
+    branchTestMagic(Assembler::NotEqual, valaddr, &notMagic);
+  } else {
+    branchTestMagic(Assembler::NotEqual, valaddr, label);
+  }
+
+  branch32(cond, ToPayload(valaddr), Imm32(why), label);
+  bind(&notMagic);
+}
+
+void MacroAssembler::branchTestValue(Condition cond, const BaseIndex& lhs,
+                                     const ValueOperand& rhs, Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+
+  Label notSameValue;
+  if (cond == Assembler::Equal) {
+    branch32(Assembler::NotEqual, ToType(lhs), rhs.typeReg(), &notSameValue);
+  } else {
+    branch32(Assembler::NotEqual, ToType(lhs), rhs.typeReg(), label);
+  }
+
+  branch32(cond, ToPayload(lhs), rhs.payloadReg(), label);
+  bind(&notSameValue);
+}
+
+template <typename T>
+void MacroAssembler::testNumberSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testNumber(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testBooleanSet(Condition cond, const T& src,
+                                    Register dest) {
+  cond = testBoolean(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testStringSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testString(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testSymbolSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testSymbol(cond, src);
+  emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testBigIntSet(Condition cond, const T& src,
+                                   Register dest) {
+  cond = testBigInt(cond, src);
+  emitSet(cond, dest);
+}
+
+void MacroAssembler::branchToComputedAddress(const BaseIndex& addr) {
+  MOZ_ASSERT(
+      addr.offset == 0,
+      "NYI: offsets from pc should be shifted by the number of instructions.");
+
+  Register base = addr.base;
+  uint32_t scale = Imm32::ShiftOf(addr.scale).value;
+
+  ma_ldr(DTRAddr(base, DtrRegImmShift(addr.index, LSL, scale)), pc);
+
+  if (base == pc) {
+    // When loading from pc, the pc is shifted to the next instruction, we
+    // add one extra instruction to accomodate for this shifted offset.
+    breakpoint();
+  }
+}
+
+void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs,
+                                 Register src, Register dest) {
+  cmp32(lhs, rhs);
+  ma_mov(src, dest, LeaveCC, cond);
+}
+
+void MacroAssembler::cmp32MovePtr(Condition cond, Register lhs, Imm32 rhs,
+                                  Register src, Register dest) {
+  cmp32(lhs, rhs);
+  ma_mov(src, dest, LeaveCC, cond);
+}
+
+void MacroAssembler::cmp32Move32(Condition cond, Register lhs,
+                                 const Address& rhs, Register src,
+                                 Register dest) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+  ma_ldr(rhs, scratch, scratch2);
+  cmp32Move32(cond, lhs, scratch, src, dest);
+}
+
+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, Register rhs,
+                                   Register src, Register dest) {
+  cmp32Move32(cond, lhs, rhs, src, dest);
+}
+
+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs,
+                                   const Address& rhs, Register src,
+                                   Register dest) {
+  cmp32Move32(cond, lhs, rhs, src, dest);
+}
+
+void MacroAssembler::cmp32Load32(Condition cond, Register lhs,
+                                 const Address& rhs, const Address& src,
+                                 Register dest) {
+  // This is never used, but must be present to facilitate linking on arm.
+  MOZ_CRASH("No known use cases");
+}
+
+void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs,
+                                 const Address& src, Register dest) {
+  // This is never used, but must be present to facilitate linking on arm.
+  MOZ_CRASH("No known use cases");
+}
+
+void MacroAssembler::cmp32LoadPtr(Condition cond, const Address& lhs, Imm32 rhs,
+                                  const Address& src, Register dest) {
+  cmp32(lhs, rhs);
+  ScratchRegisterScope scratch(*this);
+  ma_ldr(src, dest, scratch, Offset, cond);
+}
+
+void MacroAssembler::test32LoadPtr(Condition cond, const Address& addr,
+                                   Imm32 mask, const Address& src,
+                                   Register dest) {
+  MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero);
+  test32(addr, mask);
+  ScratchRegisterScope scratch(*this);
+  ma_ldr(src, dest, scratch, Offset, cond);
+}
+
+void MacroAssembler::test32MovePtr(Condition cond, const Address& addr,
+                                   Imm32 mask, Register src, Register dest) {
+  MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero);
+  test32(addr, mask);
+  ma_mov(src, dest, LeaveCC, cond);
+}
+
+void MacroAssembler::spectreMovePtr(Condition cond, Register src,
+                                    Register dest) {
+  ma_mov(src, dest, LeaveCC, cond);
+}
+
+void MacroAssembler::spectreZeroRegister(Condition cond, Register,
+                                         Register dest) {
+  ma_mov(Imm32(0), dest, cond);
+}
+
+void MacroAssembler::spectreBoundsCheck32(Register index, Register length,
+                                          Register maybeScratch,
+                                          Label* failure) {
+  MOZ_ASSERT(length != maybeScratch);
+  MOZ_ASSERT(index != maybeScratch);
+
+  branch32(Assembler::BelowOrEqual, length, index, failure);
+
+  if (JitOptions.spectreIndexMasking) {
+    ma_mov(Imm32(0), index, Assembler::BelowOrEqual);
+  }
+}
+
+void MacroAssembler::spectreBoundsCheck32(Register index, const Address& length,
+                                          Register maybeScratch,
+                                          Label* failure) {
+  MOZ_ASSERT(index != length.base);
+  MOZ_ASSERT(length.base != maybeScratch);
+  MOZ_ASSERT(index != maybeScratch);
+
+  branch32(Assembler::BelowOrEqual, length, index, failure);
+
+  if (JitOptions.spectreIndexMasking) {
+    ma_mov(Imm32(0), index, Assembler::BelowOrEqual);
+  }
+}
+
+void MacroAssembler::spectreBoundsCheckPtr(Register index, Register length,
+                                           Register maybeScratch,
+                                           Label* failure) {
+  spectreBoundsCheck32(index, length, maybeScratch, failure);
+}
+
+void MacroAssembler::spectreBoundsCheckPtr(Register index,
+                                           const Address& length,
+                                           Register maybeScratch,
+                                           Label* failure) {
+  spectreBoundsCheck32(index, length, maybeScratch, failure);
+}
+
+// ========================================================================
+// Memory access primitives.
+void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src,
+                                                const Address& addr) {
+  ScratchRegisterScope scratch(*this);
+  ma_vstr(src, addr, scratch);
+}
+void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src,
+                                                const BaseIndex& addr) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+  uint32_t scale = Imm32::ShiftOf(addr.scale).value;
+  ma_vstr(src, addr.base, addr.index, scratch, scratch2, scale, addr.offset);
+}
+
+void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src,
+                                                 const Address& addr) {
+  ScratchRegisterScope scratch(*this);
+  ma_vstr(src.asSingle(), addr, scratch);
+}
+void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src,
+                                                 const BaseIndex& addr) {
+  ScratchRegisterScope scratch(*this);
+  SecondScratchRegisterScope scratch2(*this);
+  uint32_t scale = Imm32::ShiftOf(addr.scale).value;
+  ma_vstr(src.asSingle(), addr.base, addr.index, scratch, scratch2, scale,
+          addr.offset);
+}
+
+void MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) {
+  // On ARMv6 the optional argument (BarrierST, etc) is ignored.
+  if (barrier == (MembarStoreStore | MembarSynchronizing)) {
+    ma_dsb(BarrierST);
+  } else if (barrier & MembarSynchronizing) {
+    ma_dsb();
+  } else if (barrier == MembarStoreStore) {
+    ma_dmb(BarrierST);
+  } else if (barrier) {
+    ma_dmb();
+  }
+}
+
+// ===============================================================
+// Clamping functions.
+
+void MacroAssembler::clampIntToUint8(Register reg) {
+  // Look at (reg >> 8) if it is 0, then reg shouldn't be clamped if it is
+  // <0, then we want to clamp to 0, otherwise, we wish to clamp to 255
+  ScratchRegisterScope scratch(*this);
+  as_mov(scratch, asr(reg, 8), SetCC);
+  ma_mov(Imm32(0xff), reg, NotEqual);
+  ma_mov(Imm32(0), reg, Signed);
+}
+
+template <typename T>
+void MacroAssemblerARMCompat::fallibleUnboxPtrImpl(const T& src, Register dest,
+                                                   JSValueType type,
+                                                   Label* fail) {
+  switch (type) {
+    case JSVAL_TYPE_OBJECT:
+      asMasm().branchTestObject(Assembler::NotEqual, src, fail);
+      break;
+    case JSVAL_TYPE_STRING:
+      asMasm().branchTestString(Assembler::NotEqual, src, fail);
+      break;
+    case JSVAL_TYPE_SYMBOL:
+      asMasm().branchTestSymbol(Assembler::NotEqual, src, fail);
+      break;
+    case JSVAL_TYPE_BIGINT:
+      asMasm().branchTestBigInt(Assembler::NotEqual, src, fail);
+      break;
+    default:
+      MOZ_CRASH("Unexpected type");
+  }
+  unboxNonDouble(src, dest, type);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const ValueOperand& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  fallibleUnboxPtrImpl(src, dest, type, fail);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const Address& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  fallibleUnboxPtrImpl(src, dest, type, fail);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const BaseIndex& src, Register dest,
+                                      JSValueType type, Label* fail) {
+  fallibleUnboxPtrImpl(src, dest, type, fail);
+}
+
+//}}} check_macroassembler_style
+// ===============================================================
+
+void MacroAssemblerARMCompat::incrementInt32Value(const Address& addr) {
+  asMasm().add32(Imm32(1), ToPayload(addr));
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_MacroAssembler_arm_inl_h */
diff --git a/js/src/jit/arm/MacroAssembler-arm.cpp b/js/src/jit/arm/MacroAssembler-arm.cpp
new file mode 100644
index 0000000000..fe4f36ab26
--- /dev/null
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -0,0 +1,6382 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/MacroAssembler-arm.h"
+
+#include "mozilla/Casting.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include "jsmath.h"
+
+#include "jit/arm/Simulator-arm.h"
+#include "jit/AtomicOp.h"
+#include "jit/AtomicOperations.h"
+#include "jit/Bailouts.h"
+#include "jit/BaselineFrame.h"
+#include "jit/JitFrames.h"
+#include "jit/JitRuntime.h"
+#include "jit/MacroAssembler.h"
+#include "jit/MoveEmitter.h"
+#include "js/ScalarType.h"  // js::Scalar::Type
+#include "util/Memory.h"
+#include "vm/BigIntType.h"
+#include "vm/JitActivation.h"  // js::jit::JitActivation
+#include "vm/JSContext.h"
+#include "vm/StringType.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace jit;
+
+using mozilla::Abs;
+using mozilla::BitwiseCast;
+using mozilla::DebugOnly;
+using mozilla::IsPositiveZero;
+using mozilla::Maybe;
+
+bool isValueDTRDCandidate(ValueOperand& val) {
+  // In order to be used for a DTRD memory function, the two target registers
+  // need to be a) Adjacent, with the tag larger than the payload, and b)
+  // Aligned to a multiple of two.
+  if ((val.typeReg().code() != (val.payloadReg().code() + 1))) {
+    return false;
+  }
+  if ((val.payloadReg().code() & 1) != 0) {
+    return false;
+  }
+  return true;
+}
+
+void MacroAssemblerARM::convertBoolToInt32(Register source, Register dest) {
+  // Note that C++ bool is only 1 byte, so zero extend it to clear the
+  // higher-order bits.
+  as_and(dest, source, Imm8(0xff));
+}
+
+void MacroAssemblerARM::convertInt32ToDouble(Register src,
+                                             FloatRegister dest_) {
+  // Direct conversions aren't possible.
+  VFPRegister dest = VFPRegister(dest_);
+  as_vxfer(src, InvalidReg, dest.sintOverlay(), CoreToFloat);
+  as_vcvt(dest, dest.sintOverlay());
+}
+
+void MacroAssemblerARM::convertInt32ToDouble(const Address& src,
+                                             FloatRegister dest) {
+  ScratchDoubleScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_vldr(src, scratch, scratch2);
+  as_vcvt(dest, VFPRegister(scratch).sintOverlay());
+}
+
+void MacroAssemblerARM::convertInt32ToDouble(const BaseIndex& src,
+                                             FloatRegister dest) {
+  Register base = src.base;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (src.offset != 0) {
+    ma_add(base, Imm32(src.offset), scratch, scratch2);
+    base = scratch;
+  }
+  ma_ldr(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), scratch);
+  convertInt32ToDouble(scratch, dest);
+}
+
+void MacroAssemblerARM::convertUInt32ToDouble(Register src,
+                                              FloatRegister dest_) {
+  // Direct conversions aren't possible.
+  VFPRegister dest = VFPRegister(dest_);
+  as_vxfer(src, InvalidReg, dest.uintOverlay(), CoreToFloat);
+  as_vcvt(dest, dest.uintOverlay());
+}
+
+static const double TO_DOUBLE_HIGH_SCALE = 0x100000000;
+
+void MacroAssemblerARM::convertUInt32ToFloat32(Register src,
+                                               FloatRegister dest_) {
+  // Direct conversions aren't possible.
+  VFPRegister dest = VFPRegister(dest_);
+  as_vxfer(src, InvalidReg, dest.uintOverlay(), CoreToFloat);
+  as_vcvt(VFPRegister(dest).singleOverlay(), dest.uintOverlay());
+}
+
+void MacroAssemblerARM::convertDoubleToFloat32(FloatRegister src,
+                                               FloatRegister dest,
+                                               Condition c) {
+  as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src), false, c);
+}
+
+// Checks whether a double is representable as a 32-bit integer. If so, the
+// integer is written to the output register. Otherwise, a bailout is taken to
+// the given snapshot. This function overwrites the scratch float register.
+void MacroAssemblerARM::convertDoubleToInt32(FloatRegister src, Register dest,
+                                             Label* fail,
+                                             bool negativeZeroCheck) {
+  // Convert the floating point value to an integer, if it did not fit, then
+  // when we convert it *back* to a float, it will have a different value,
+  // which we can test.
+  ScratchDoubleScope scratchDouble(asMasm());
+  ScratchRegisterScope scratch(asMasm());
+
+  FloatRegister scratchSIntReg = scratchDouble.sintOverlay();
+
+  ma_vcvt_F64_I32(src, scratchSIntReg);
+  // Move the value into the dest register.
+  ma_vxfer(scratchSIntReg, dest);
+  ma_vcvt_I32_F64(scratchSIntReg, scratchDouble);
+  ma_vcmp(src, scratchDouble);
+  as_vmrs(pc);
+  ma_b(fail, Assembler::VFP_NotEqualOrUnordered);
+
+  if (negativeZeroCheck) {
+    as_cmp(dest, Imm8(0));
+    // Test and bail for -0.0, when integer result is 0. Move the top word
+    // of the double into the output reg, if it is non-zero, then the
+    // original value was -0.0.
+    as_vxfer(dest, InvalidReg, src, FloatToCore, Assembler::Equal, 1);
+    ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::Equal);
+    ma_b(fail, Assembler::Equal);
+  }
+}
+
+// Checks whether a float32 is representable as a 32-bit integer. If so, the
+// integer is written to the output register. Otherwise, a bailout is taken to
+// the given snapshot. This function overwrites the scratch float register.
+void MacroAssemblerARM::convertFloat32ToInt32(FloatRegister src, Register dest,
+                                              Label* fail,
+                                              bool negativeZeroCheck) {
+  // Converting the floating point value to an integer and then converting it
+  // back to a float32 would not work, as float to int32 conversions are
+  // clamping (e.g. float(INT32_MAX + 1) would get converted into INT32_MAX
+  // and then back to float(INT32_MAX + 1)).  If this ever happens, we just
+  // bail out.
+  ScratchFloat32Scope scratchFloat(asMasm());
+  ScratchRegisterScope scratch(asMasm());
+
+  FloatRegister ScratchSIntReg = scratchFloat.sintOverlay();
+  ma_vcvt_F32_I32(src, ScratchSIntReg);
+
+  // Store the result
+  ma_vxfer(ScratchSIntReg, dest);
+
+  ma_vcvt_I32_F32(ScratchSIntReg, scratchFloat);
+  ma_vcmp(src, scratchFloat);
+  as_vmrs(pc);
+  ma_b(fail, Assembler::VFP_NotEqualOrUnordered);
+
+  // Bail out in the clamped cases.
+  ma_cmp(dest, Imm32(0x7fffffff), scratch);
+  ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
+  ma_b(fail, Assembler::Equal);
+
+  if (negativeZeroCheck) {
+    as_cmp(dest, Imm8(0));
+    // Test and bail for -0.0, when integer result is 0. Move the float into
+    // the output reg, and if it is non-zero then the original value was
+    // -0.0
+    as_vxfer(dest, InvalidReg, VFPRegister(src).singleOverlay(), FloatToCore,
+             Assembler::Equal, 0);
+    ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::Equal);
+    ma_b(fail, Assembler::Equal);
+  }
+}
+
+void MacroAssemblerARM::convertFloat32ToDouble(FloatRegister src,
+                                               FloatRegister dest) {
+  MOZ_ASSERT(dest.isDouble());
+  MOZ_ASSERT(src.isSingle());
+  as_vcvt(VFPRegister(dest), VFPRegister(src).singleOverlay());
+}
+
+void MacroAssemblerARM::convertInt32ToFloat32(Register src,
+                                              FloatRegister dest) {
+  // Direct conversions aren't possible.
+  as_vxfer(src, InvalidReg, dest.sintOverlay(), CoreToFloat);
+  as_vcvt(dest.singleOverlay(), dest.sintOverlay());
+}
+
+void MacroAssemblerARM::convertInt32ToFloat32(const Address& src,
+                                              FloatRegister dest) {
+  ScratchFloat32Scope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_vldr(src, scratch, scratch2);
+  as_vcvt(dest, VFPRegister(scratch).sintOverlay());
+}
+
+bool MacroAssemblerARM::alu_dbl(Register src1, Imm32 imm, Register dest,
+                                ALUOp op, SBit s, Condition c) {
+  if ((s == SetCC && !condsAreSafe(op)) || !can_dbl(op)) {
+    return false;
+  }
+
+  ALUOp interop = getDestVariant(op);
+  Imm8::TwoImm8mData both = Imm8::EncodeTwoImms(imm.value);
+  if (both.fst().invalid()) {
+    return false;
+  }
+
+  // For the most part, there is no good reason to set the condition codes for
+  // the first instruction. We can do better things if the second instruction
+  // doesn't have a dest, such as check for overflow by doing first operation
+  // don't do second operation if first operation overflowed. This preserves
+  // the overflow condition code. Unfortunately, it is horribly brittle.
+  as_alu(dest, src1, Operand2(both.fst()), interop, LeaveCC, c);
+  as_alu(dest, dest, Operand2(both.snd()), op, s, c);
+  return true;
+}
+
+void MacroAssemblerARM::ma_alu(Register src1, Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, ALUOp op, SBit s,
+                               Condition c) {
+  // ma_mov should be used for moves.
+  MOZ_ASSERT(op != OpMov);
+  MOZ_ASSERT(op != OpMvn);
+  MOZ_ASSERT(src1 != scratch);
+
+  // As it turns out, if you ask for a compare-like instruction you *probably*
+  // want it to set condition codes.
+  MOZ_ASSERT_IF(dest == InvalidReg, s == SetCC);
+
+  // The operator gives us the ability to determine how this can be used.
+  Imm8 imm8 = Imm8(imm.value);
+  // One instruction: If we can encode it using an imm8m, then do so.
+  if (!imm8.invalid()) {
+    as_alu(dest, src1, imm8, op, s, c);
+    return;
+  }
+
+  // One instruction, negated:
+  Imm32 negImm = imm;
+  Register negDest;
+  ALUOp negOp = ALUNeg(op, dest, scratch, &negImm, &negDest);
+  Imm8 negImm8 = Imm8(negImm.value);
+  // 'add r1, r2, -15' can be replaced with 'sub r1, r2, 15'.
+  // The dest can be replaced (InvalidReg => scratch).
+  // This is useful if we wish to negate tst. tst has an invalid (aka not
+  // used) dest, but its negation bic requires a dest.
+  if (negOp != OpInvalid && !negImm8.invalid()) {
+    as_alu(negDest, src1, negImm8, negOp, s, c);
+    return;
+  }
+
+  // Start by attempting to generate a two instruction form. Some things
+  // cannot be made into two-inst forms correctly. Namely, adds dest, src,
+  // 0xffff. Since we want the condition codes (and don't know which ones
+  // will be checked), we need to assume that the overflow flag will be
+  // checked and add{,s} dest, src, 0xff00; add{,s} dest, dest, 0xff is not
+  // guaranteed to set the overflof flag the same as the (theoretical) one
+  // instruction variant.
+  if (alu_dbl(src1, imm, dest, op, s, c)) {
+    return;
+  }
+
+  // And try with its negative.
+  if (negOp != OpInvalid && alu_dbl(src1, negImm, negDest, negOp, s, c)) {
+    return;
+  }
+
+  ma_mov(imm, scratch, c);
+  as_alu(dest, src1, O2Reg(scratch), op, s, c);
+}
+
+void MacroAssemblerARM::ma_alu(Register src1, Operand op2, Register dest,
+                               ALUOp op, SBit s, Assembler::Condition c) {
+  MOZ_ASSERT(op2.tag() == Operand::Tag::OP2);
+  as_alu(dest, src1, op2.toOp2(), op, s, c);
+}
+
+void MacroAssemblerARM::ma_alu(Register src1, Operand2 op2, Register dest,
+                               ALUOp op, SBit s, Condition c) {
+  as_alu(dest, src1, op2, op, s, c);
+}
+
+void MacroAssemblerARM::ma_nop() { as_nop(); }
+
+BufferOffset MacroAssemblerARM::ma_movPatchable(Imm32 imm_, Register dest,
+                                                Assembler::Condition c) {
+  int32_t imm = imm_.value;
+  if (HasMOVWT()) {
+    BufferOffset offset = as_movw(dest, Imm16(imm & 0xffff), c);
+    as_movt(dest, Imm16(imm >> 16 & 0xffff), c);
+    return offset;
+  } else {
+    return as_Imm32Pool(dest, imm, c);
+  }
+}
+
+BufferOffset MacroAssemblerARM::ma_movPatchable(ImmPtr imm, Register dest,
+                                                Assembler::Condition c) {
+  return ma_movPatchable(Imm32(int32_t(imm.value)), dest, c);
+}
+
+/* static */
+template <class Iter>
+void MacroAssemblerARM::ma_mov_patch(Imm32 imm32, Register dest,
+                                     Assembler::Condition c, RelocStyle rs,
+                                     Iter iter) {
+  // The current instruction must be an actual instruction,
+  // not automatically-inserted boilerplate.
+  MOZ_ASSERT(iter.cur());
+  MOZ_ASSERT(iter.cur() == iter.maybeSkipAutomaticInstructions());
+
+  int32_t imm = imm32.value;
+  switch (rs) {
+    case L_MOVWT:
+      Assembler::as_movw_patch(dest, Imm16(imm & 0xffff), c, iter.cur());
+      Assembler::as_movt_patch(dest, Imm16(imm >> 16 & 0xffff), c, iter.next());
+      break;
+    case L_LDR:
+      Assembler::WritePoolEntry(iter.cur(), c, imm);
+      break;
+  }
+}
+
+template void MacroAssemblerARM::ma_mov_patch(Imm32 imm32, Register dest,
+                                              Assembler::Condition c,
+                                              RelocStyle rs,
+                                              InstructionIterator iter);
+template void MacroAssemblerARM::ma_mov_patch(Imm32 imm32, Register dest,
+                                              Assembler::Condition c,
+                                              RelocStyle rs,
+                                              BufferInstructionIterator iter);
+
+void MacroAssemblerARM::ma_mov(Register src, Register dest, SBit s,
+                               Assembler::Condition c) {
+  if (s == SetCC || dest != src) {
+    as_mov(dest, O2Reg(src), s, c);
+  }
+}
+
+void MacroAssemblerARM::ma_mov(Imm32 imm, Register dest,
+                               Assembler::Condition c) {
+  // Try mov with Imm8 operand.
+  Imm8 imm8 = Imm8(imm.value);
+  if (!imm8.invalid()) {
+    as_alu(dest, InvalidReg, imm8, OpMov, LeaveCC, c);
+    return;
+  }
+
+  // Try mvn with Imm8 operand.
+  Imm8 negImm8 = Imm8(~imm.value);
+  if (!negImm8.invalid()) {
+    as_alu(dest, InvalidReg, negImm8, OpMvn, LeaveCC, c);
+    return;
+  }
+
+  // Try movw/movt.
+  if (HasMOVWT()) {
+    // ARMv7 supports movw/movt. movw zero-extends its 16 bit argument,
+    // so we can set the register this way. movt leaves the bottom 16
+    // bits in tact, so we always need a movw.
+    as_movw(dest, Imm16(imm.value & 0xffff), c);
+    if (uint32_t(imm.value) >> 16) {
+      as_movt(dest, Imm16(uint32_t(imm.value) >> 16), c);
+    }
+    return;
+  }
+
+  // If we don't have movw/movt, we need a load.
+  as_Imm32Pool(dest, imm.value, c);
+}
+
+void MacroAssemblerARM::ma_mov(ImmWord imm, Register dest,
+                               Assembler::Condition c) {
+  ma_mov(Imm32(imm.value), dest, c);
+}
+
+void MacroAssemblerARM::ma_mov(ImmGCPtr ptr, Register dest) {
+  BufferOffset offset =
+      ma_movPatchable(Imm32(uintptr_t(ptr.value)), dest, Always);
+  writeDataRelocation(offset, ptr);
+}
+
+// Shifts (just a move with a shifting op2)
+void MacroAssemblerARM::ma_lsl(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, lsl(src, shift.value));
+}
+
+void MacroAssemblerARM::ma_lsr(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, lsr(src, shift.value));
+}
+
+void MacroAssemblerARM::ma_asr(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, asr(src, shift.value));
+}
+
+void MacroAssemblerARM::ma_ror(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, ror(src, shift.value));
+}
+
+void MacroAssemblerARM::ma_rol(Imm32 shift, Register src, Register dst) {
+  as_mov(dst, rol(src, shift.value));
+}
+
+// Shifts (just a move with a shifting op2)
+void MacroAssemblerARM::ma_lsl(Register shift, Register src, Register dst) {
+  as_mov(dst, lsl(src, shift));
+}
+
+void MacroAssemblerARM::ma_lsr(Register shift, Register src, Register dst) {
+  as_mov(dst, lsr(src, shift));
+}
+
+void MacroAssemblerARM::ma_asr(Register shift, Register src, Register dst) {
+  as_mov(dst, asr(src, shift));
+}
+
+void MacroAssemblerARM::ma_ror(Register shift, Register src, Register dst) {
+  as_mov(dst, ror(src, shift));
+}
+
+void MacroAssemblerARM::ma_rol(Register shift, Register src, Register dst,
+                               AutoRegisterScope& scratch) {
+  as_rsb(scratch, shift, Imm8(32));
+  as_mov(dst, ror(src, scratch));
+}
+
+// Move not (dest <- ~src)
+void MacroAssemblerARM::ma_mvn(Register src1, Register dest, SBit s,
+                               Assembler::Condition c) {
+  as_alu(dest, InvalidReg, O2Reg(src1), OpMvn, s, c);
+}
+
+// Negate (dest <- -src), src is a register, rather than a general op2.
+void MacroAssemblerARM::ma_neg(Register src1, Register dest, SBit s,
+                               Assembler::Condition c) {
+  as_rsb(dest, src1, Imm8(0), s, c);
+}
+
+void MacroAssemblerARM::ma_neg(Register64 src, Register64 dest) {
+  as_rsb(dest.low, src.low, Imm8(0), SetCC);
+  as_rsc(dest.high, src.high, Imm8(0));
+}
+
+// And.
+void MacroAssemblerARM::ma_and(Register src, Register dest, SBit s,
+                               Assembler::Condition c) {
+  ma_and(dest, src, dest);
+}
+
+void MacroAssemblerARM::ma_and(Register src1, Register src2, Register dest,
+                               SBit s, Assembler::Condition c) {
+  as_and(dest, src1, O2Reg(src2), s, c);
+}
+
+void MacroAssemblerARM::ma_and(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpAnd, s, c);
+}
+
+void MacroAssemblerARM::ma_and(Imm32 imm, Register src1, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(src1, imm, dest, scratch, OpAnd, s, c);
+}
+
+// Bit clear (dest <- dest & ~imm) or (dest <- src1 & ~src2).
+void MacroAssemblerARM::ma_bic(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpBic, s, c);
+}
+
+// Exclusive or.
+void MacroAssemblerARM::ma_eor(Register src, Register dest, SBit s,
+                               Assembler::Condition c) {
+  ma_eor(dest, src, dest, s, c);
+}
+
+void MacroAssemblerARM::ma_eor(Register src1, Register src2, Register dest,
+                               SBit s, Assembler::Condition c) {
+  as_eor(dest, src1, O2Reg(src2), s, c);
+}
+
+void MacroAssemblerARM::ma_eor(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpEor, s, c);
+}
+
+void MacroAssemblerARM::ma_eor(Imm32 imm, Register src1, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(src1, imm, dest, scratch, OpEor, s, c);
+}
+
+// Or.
+void MacroAssemblerARM::ma_orr(Register src, Register dest, SBit s,
+                               Assembler::Condition c) {
+  ma_orr(dest, src, dest, s, c);
+}
+
+void MacroAssemblerARM::ma_orr(Register src1, Register src2, Register dest,
+                               SBit s, Assembler::Condition c) {
+  as_orr(dest, src1, O2Reg(src2), s, c);
+}
+
+void MacroAssemblerARM::ma_orr(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpOrr, s, c);
+}
+
+void MacroAssemblerARM::ma_orr(Imm32 imm, Register src1, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Assembler::Condition c) {
+  ma_alu(src1, imm, dest, scratch, OpOrr, s, c);
+}
+
+// Arithmetic-based ops.
+// Add with carry.
+void MacroAssemblerARM::ma_adc(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpAdc, s, c);
+}
+
+void MacroAssemblerARM::ma_adc(Register src, Register dest, SBit s,
+                               Condition c) {
+  as_alu(dest, dest, O2Reg(src), OpAdc, s, c);
+}
+
+void MacroAssemblerARM::ma_adc(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpAdc, s, c);
+}
+
+void MacroAssemblerARM::ma_adc(Register src1, Imm32 op, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, scratch, OpAdc, s, c);
+}
+
+// Add.
+void MacroAssemblerARM::ma_add(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpAdd, s, c);
+}
+
+void MacroAssemblerARM::ma_add(Register src1, Register dest, SBit s,
+                               Condition c) {
+  ma_alu(dest, O2Reg(src1), dest, OpAdd, s, c);
+}
+
+void MacroAssemblerARM::ma_add(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpAdd, s, c);
+}
+
+void MacroAssemblerARM::ma_add(Register src1, Operand op, Register dest, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, OpAdd, s, c);
+}
+
+void MacroAssemblerARM::ma_add(Register src1, Imm32 op, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, scratch, OpAdd, s, c);
+}
+
+// Subtract with carry.
+void MacroAssemblerARM::ma_sbc(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpSbc, s, c);
+}
+
+void MacroAssemblerARM::ma_sbc(Register src1, Register dest, SBit s,
+                               Condition c) {
+  as_alu(dest, dest, O2Reg(src1), OpSbc, s, c);
+}
+
+void MacroAssemblerARM::ma_sbc(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpSbc, s, c);
+}
+
+// Subtract.
+void MacroAssemblerARM::ma_sub(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpSub, s, c);
+}
+
+void MacroAssemblerARM::ma_sub(Register src1, Register dest, SBit s,
+                               Condition c) {
+  ma_alu(dest, Operand(src1), dest, OpSub, s, c);
+}
+
+void MacroAssemblerARM::ma_sub(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  ma_alu(src1, Operand(src2), dest, OpSub, s, c);
+}
+
+void MacroAssemblerARM::ma_sub(Register src1, Operand op, Register dest, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, OpSub, s, c);
+}
+
+void MacroAssemblerARM::ma_sub(Register src1, Imm32 op, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(src1, op, dest, scratch, OpSub, s, c);
+}
+
+// Reverse subtract.
+void MacroAssemblerARM::ma_rsb(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpRsb, s, c);
+}
+
+void MacroAssemblerARM::ma_rsb(Register src1, Register dest, SBit s,
+                               Condition c) {
+  as_alu(dest, src1, O2Reg(dest), OpRsb, s, c);
+}
+
+void MacroAssemblerARM::ma_rsb(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpRsb, s, c);
+}
+
+void MacroAssemblerARM::ma_rsb(Register src1, Imm32 op2, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(src1, op2, dest, scratch, OpRsb, s, c);
+}
+
+// Reverse subtract with carry.
+void MacroAssemblerARM::ma_rsc(Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch, SBit s,
+                               Condition c) {
+  ma_alu(dest, imm, dest, scratch, OpRsc, s, c);
+}
+
+void MacroAssemblerARM::ma_rsc(Register src1, Register dest, SBit s,
+                               Condition c) {
+  as_alu(dest, dest, O2Reg(src1), OpRsc, s, c);
+}
+
+void MacroAssemblerARM::ma_rsc(Register src1, Register src2, Register dest,
+                               SBit s, Condition c) {
+  as_alu(dest, src1, O2Reg(src2), OpRsc, s, c);
+}
+
+// Compares/tests.
+// Compare negative (sets condition codes as src1 + src2 would).
+void MacroAssemblerARM::ma_cmn(Register src1, Imm32 imm,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_alu(src1, imm, InvalidReg, scratch, OpCmn, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_cmn(Register src1, Register src2, Condition c) {
+  as_alu(InvalidReg, src2, O2Reg(src1), OpCmn, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_cmn(Register src1, Operand op, Condition c) {
+  MOZ_CRASH("Feature NYI");
+}
+
+// Compare (src - src2).
+void MacroAssemblerARM::ma_cmp(Register src1, Imm32 imm,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_alu(src1, imm, InvalidReg, scratch, OpCmp, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, ImmTag tag, Condition c) {
+  // ImmTag comparisons can always be done without use of a scratch register.
+  Imm8 negtag = Imm8(-tag.value);
+  MOZ_ASSERT(!negtag.invalid());
+  as_cmn(src1, negtag, c);
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, ImmWord ptr,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_cmp(src1, Imm32(ptr.value), scratch, c);
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, ImmGCPtr ptr,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_mov(ptr, scratch);
+  ma_cmp(src1, scratch, c);
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, Operand op,
+                               AutoRegisterScope& scratch,
+                               AutoRegisterScope& scratch2, Condition c) {
+  switch (op.tag()) {
+    case Operand::Tag::OP2:
+      as_cmp(src1, op.toOp2(), c);
+      break;
+    case Operand::Tag::MEM:
+      ma_ldr(op.toAddress(), scratch, scratch2);
+      as_cmp(src1, O2Reg(scratch), c);
+      break;
+    default:
+      MOZ_CRASH("trying to compare FP and integer registers");
+  }
+}
+
+void MacroAssemblerARM::ma_cmp(Register src1, Register src2, Condition c) {
+  as_cmp(src1, O2Reg(src2), c);
+}
+
+// Test for equality, (src1 ^ src2).
+void MacroAssemblerARM::ma_teq(Register src1, Imm32 imm,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_alu(src1, imm, InvalidReg, scratch, OpTeq, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_teq(Register src1, Register src2, Condition c) {
+  as_tst(src1, O2Reg(src2), c);
+}
+
+void MacroAssemblerARM::ma_teq(Register src1, Operand op, Condition c) {
+  as_teq(src1, op.toOp2(), c);
+}
+
+// Test (src1 & src2).
+void MacroAssemblerARM::ma_tst(Register src1, Imm32 imm,
+                               AutoRegisterScope& scratch, Condition c) {
+  ma_alu(src1, imm, InvalidReg, scratch, OpTst, SetCC, c);
+}
+
+void MacroAssemblerARM::ma_tst(Register src1, Register src2, Condition c) {
+  as_tst(src1, O2Reg(src2), c);
+}
+
+void MacroAssemblerARM::ma_tst(Register src1, Operand op, Condition c) {
+  as_tst(src1, op.toOp2(), c);
+}
+
+void MacroAssemblerARM::ma_mul(Register src1, Register src2, Register dest) {
+  as_mul(dest, src1, src2);
+}
+
+void MacroAssemblerARM::ma_mul(Register src1, Imm32 imm, Register dest,
+                               AutoRegisterScope& scratch) {
+  ma_mov(imm, scratch);
+  as_mul(dest, src1, scratch);
+}
+
+Assembler::Condition MacroAssemblerARM::ma_check_mul(Register src1,
+                                                     Register src2,
+                                                     Register dest,
+                                                     AutoRegisterScope& scratch,
+                                                     Condition cond) {
+  // TODO: this operation is illegal on armv6 and earlier
+  // if src2 == scratch or src2 == dest.
+  if (cond == Equal || cond == NotEqual) {
+    as_smull(scratch, dest, src1, src2, SetCC);
+    return cond;
+  }
+
+  if (cond == Overflow) {
+    as_smull(scratch, dest, src1, src2);
+    as_cmp(scratch, asr(dest, 31));
+    return NotEqual;
+  }
+
+  MOZ_CRASH("Condition NYI");
+}
+
+Assembler::Condition MacroAssemblerARM::ma_check_mul(Register src1, Imm32 imm,
+                                                     Register dest,
+                                                     AutoRegisterScope& scratch,
+                                                     Condition cond) {
+  ma_mov(imm, scratch);
+
+  if (cond == Equal || cond == NotEqual) {
+    as_smull(scratch, dest, scratch, src1, SetCC);
+    return cond;
+  }
+
+  if (cond == Overflow) {
+    as_smull(scratch, dest, scratch, src1);
+    as_cmp(scratch, asr(dest, 31));
+    return NotEqual;
+  }
+
+  MOZ_CRASH("Condition NYI");
+}
+
+void MacroAssemblerARM::ma_umull(Register src1, Imm32 imm, Register destHigh,
+                                 Register destLow, AutoRegisterScope& scratch) {
+  ma_mov(imm, scratch);
+  as_umull(destHigh, destLow, src1, scratch);
+}
+
+void MacroAssemblerARM::ma_umull(Register src1, Register src2,
+                                 Register destHigh, Register destLow) {
+  as_umull(destHigh, destLow, src1, src2);
+}
+
+void MacroAssemblerARM::ma_mod_mask(Register src, Register dest, Register hold,
+                                    Register tmp, AutoRegisterScope& scratch,
+                                    AutoRegisterScope& scratch2,
+                                    int32_t shift) {
+  // We wish to compute x % (1<<y) - 1 for a known constant, y.
+  //
+  // 1. Let b = (1<<y) and C = (1<<y)-1, then think of the 32 bit dividend as
+  // a number in base b, namely c_0*1 + c_1*b + c_2*b^2 ... c_n*b^n
+  //
+  // 2. Since both addition and multiplication commute with modulus:
+  //   x % C == (c_0 + c_1*b + ... + c_n*b^n) % C ==
+  //    (c_0 % C) + (c_1%C) * (b % C) + (c_2 % C) * (b^2 % C)...
+  //
+  // 3. Since b == C + 1, b % C == 1, and b^n % C == 1 the whole thing
+  // simplifies to: c_0 + c_1 + c_2 ... c_n % C
+  //
+  // Each c_n can easily be computed by a shift/bitextract, and the modulus
+  // can be maintained by simply subtracting by C whenever the number gets
+  // over C.
+  int32_t mask = (1 << shift) - 1;
+  Label head;
+
+  // Register 'hold' holds -1 if the value was negative, 1 otherwise. The
+  // scratch reg holds the remaining bits that have not been processed lr
+  // serves as a temporary location to store extracted bits into as well as
+  // holding the trial subtraction as a temp value dest is the accumulator
+  // (and holds the final result)
+  //
+  // Move the whole value into tmp, setting the codition codes so we can muck
+  // with them later.
+  as_mov(tmp, O2Reg(src), SetCC);
+  // Zero out the dest.
+  ma_mov(Imm32(0), dest);
+  // Set the hold appropriately.
+  ma_mov(Imm32(1), hold);
+  ma_mov(Imm32(-1), hold, Signed);
+  as_rsb(tmp, tmp, Imm8(0), SetCC, Signed);
+
+  // Begin the main loop.
+  bind(&head);
+  {
+    // Extract the bottom bits.
+    ma_and(Imm32(mask), tmp, scratch, scratch2);
+    // Add those bits to the accumulator.
+    ma_add(scratch, dest, dest);
+    // Do a trial subtraction, this is the same operation as cmp, but we store
+    // the dest.
+    ma_sub(dest, Imm32(mask), scratch, scratch2, SetCC);
+    // If (sum - C) > 0, store sum - C back into sum, thus performing a modulus.
+    ma_mov(scratch, dest, LeaveCC, NotSigned);
+    // Get rid of the bits that we extracted before, and set the condition
+    // codes.
+    as_mov(tmp, lsr(tmp, shift), SetCC);
+    // If the shift produced zero, finish, otherwise, continue in the loop.
+    ma_b(&head, NonZero);
+  }
+
+  // Check the hold to see if we need to negate the result. Hold can only be
+  // 1 or -1, so this will never set the 0 flag.
+  as_cmp(hold, Imm8(0));
+  // If the hold was non-zero, negate the result to be in line with what JS
+  // wants this will set the condition codes if we try to negate.
+  as_rsb(dest, dest, Imm8(0), SetCC, Signed);
+  // Since the Zero flag is not set by the compare, we can *only* set the Zero
+  // flag in the rsb, so Zero is set iff we negated zero (e.g. the result of
+  // the computation was -0.0).
+}
+
+void MacroAssemblerARM::ma_smod(Register num, Register div, Register dest,
+                                AutoRegisterScope& scratch) {
+  as_sdiv(scratch, num, div);
+  as_mls(dest, num, scratch, div);
+}
+
+void MacroAssemblerARM::ma_umod(Register num, Register div, Register dest,
+                                AutoRegisterScope& scratch) {
+  as_udiv(scratch, num, div);
+  as_mls(dest, num, scratch, div);
+}
+
+// Division
+void MacroAssemblerARM::ma_sdiv(Register num, Register div, Register dest,
+                                Condition cond) {
+  as_sdiv(dest, num, div, cond);
+}
+
+void MacroAssemblerARM::ma_udiv(Register num, Register div, Register dest,
+                                Condition cond) {
+  as_udiv(dest, num, div, cond);
+}
+
+// Miscellaneous instructions.
+void MacroAssemblerARM::ma_clz(Register src, Register dest, Condition cond) {
+  as_clz(dest, src, cond);
+}
+
+void MacroAssemblerARM::ma_ctz(Register src, Register dest,
+                               AutoRegisterScope& scratch) {
+  // int c = __clz(a & -a);
+  // return a ? 31 - c : c;
+  as_rsb(scratch, src, Imm8(0), SetCC);
+  as_and(dest, src, O2Reg(scratch), LeaveCC);
+  as_clz(dest, dest);
+  as_rsb(dest, dest, Imm8(0x1F), LeaveCC, Assembler::NotEqual);
+}
+
+// Memory.
+// Shortcut for when we know we're transferring 32 bits of data.
+void MacroAssemblerARM::ma_dtr(LoadStore ls, Register rn, Imm32 offset,
+                               Register rt, AutoRegisterScope& scratch,
+                               Index mode, Assembler::Condition cc) {
+  ma_dataTransferN(ls, 32, true, rn, offset, rt, scratch, mode, cc);
+}
+
+void MacroAssemblerARM::ma_dtr(LoadStore ls, Register rt, const Address& addr,
+                               AutoRegisterScope& scratch, Index mode,
+                               Condition cc) {
+  ma_dataTransferN(ls, 32, true, addr.base, Imm32(addr.offset), rt, scratch,
+                   mode, cc);
+}
+
+void MacroAssemblerARM::ma_str(Register rt, DTRAddr addr, Index mode,
+                               Condition cc) {
+  as_dtr(IsStore, 32, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_str(Register rt, const Address& addr,
+                               AutoRegisterScope& scratch, Index mode,
+                               Condition cc) {
+  ma_dtr(IsStore, rt, addr, scratch, mode, cc);
+}
+
+void MacroAssemblerARM::ma_strd(Register rt, DebugOnly<Register> rt2,
+                                EDtrAddr addr, Index mode, Condition cc) {
+  MOZ_ASSERT((rt.code() & 1) == 0);
+  MOZ_ASSERT(rt2.value.code() == rt.code() + 1);
+  as_extdtr(IsStore, 64, true, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldr(DTRAddr addr, Register rt, Index mode,
+                               Condition cc) {
+  as_dtr(IsLoad, 32, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldr(const Address& addr, Register rt,
+                               AutoRegisterScope& scratch, Index mode,
+                               Condition cc) {
+  ma_dtr(IsLoad, rt, addr, scratch, mode, cc);
+}
+
+void MacroAssemblerARM::ma_ldrb(DTRAddr addr, Register rt, Index mode,
+                                Condition cc) {
+  as_dtr(IsLoad, 8, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldrsh(EDtrAddr addr, Register rt, Index mode,
+                                 Condition cc) {
+  as_extdtr(IsLoad, 16, true, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldrh(EDtrAddr addr, Register rt, Index mode,
+                                Condition cc) {
+  as_extdtr(IsLoad, 16, false, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldrsb(EDtrAddr addr, Register rt, Index mode,
+                                 Condition cc) {
+  as_extdtr(IsLoad, 8, true, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_ldrd(EDtrAddr addr, Register rt,
+                                DebugOnly<Register> rt2, Index mode,
+                                Condition cc) {
+  MOZ_ASSERT((rt.code() & 1) == 0);
+  MOZ_ASSERT(rt2.value.code() == rt.code() + 1);
+  MOZ_ASSERT(addr.maybeOffsetRegister() !=
+             rt);  // Undefined behavior if rm == rt/rt2.
+  MOZ_ASSERT(addr.maybeOffsetRegister() != rt2);
+  as_extdtr(IsLoad, 64, true, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_strh(Register rt, EDtrAddr addr, Index mode,
+                                Condition cc) {
+  as_extdtr(IsStore, 16, false, mode, rt, addr, cc);
+}
+
+void MacroAssemblerARM::ma_strb(Register rt, DTRAddr addr, Index mode,
+                                Condition cc) {
+  as_dtr(IsStore, 8, mode, rt, addr, cc);
+}
+
+// Specialty for moving N bits of data, where n == 8,16,32,64.
+BufferOffset MacroAssemblerARM::ma_dataTransferN(
+    LoadStore ls, int size, bool IsSigned, Register rn, Register rm,
+    Register rt, AutoRegisterScope& scratch, Index mode,
+    Assembler::Condition cc, Scale scale) {
+  MOZ_ASSERT(size == 8 || size == 16 || size == 32 || size == 64);
+
+  if (size == 32 || (size == 8 && !IsSigned)) {
+    return as_dtr(ls, size, mode, rt,
+                  DTRAddr(rn, DtrRegImmShift(rm, LSL, scale)), cc);
+  }
+
+  if (scale != TimesOne) {
+    ma_lsl(Imm32(scale), rm, scratch);
+    rm = scratch;
+  }
+
+  return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(rm)),
+                   cc);
+}
+
+// No scratch register is required if scale is TimesOne.
+BufferOffset MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size,
+                                                 bool IsSigned, Register rn,
+                                                 Register rm, Register rt,
+                                                 Index mode,
+                                                 Assembler::Condition cc) {
+  MOZ_ASSERT(size == 8 || size == 16 || size == 32 || size == 64);
+  if (size == 32 || (size == 8 && !IsSigned)) {
+    return as_dtr(ls, size, mode, rt,
+                  DTRAddr(rn, DtrRegImmShift(rm, LSL, TimesOne)), cc);
+  }
+  return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(rm)),
+                   cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size,
+                                                 bool IsSigned, Register rn,
+                                                 Imm32 offset, Register rt,
+                                                 AutoRegisterScope& scratch,
+                                                 Index mode,
+                                                 Assembler::Condition cc) {
+  MOZ_ASSERT(!(ls == IsLoad && mode == PostIndex && rt == pc),
+             "Large-offset PostIndex loading into PC requires special logic: "
+             "see ma_popn_pc().");
+
+  int off = offset.value;
+
+  // We can encode this as a standard ldr.
+  if (size == 32 || (size == 8 && !IsSigned)) {
+    if (off < 4096 && off > -4096) {
+      // This encodes as a single instruction, Emulating mode's behavior
+      // in a multi-instruction sequence is not necessary.
+      return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrOffImm(off)), cc);
+    }
+
+    // We cannot encode this offset in a single ldr. For mode == index,
+    // try to encode it as |add scratch, base, imm; ldr dest, [scratch,
+    // +offset]|. This does not wark for mode == PreIndex or mode == PostIndex.
+    // PreIndex is simple, just do the add into the base register first,
+    // then do a PreIndex'ed load. PostIndexed loads can be tricky.
+    // Normally, doing the load with an index of 0, then doing an add would
+    // work, but if the destination is the PC, you don't get to execute the
+    // instruction after the branch, which will lead to the base register
+    // not being updated correctly. Explicitly handle this case, without
+    // doing anything fancy, then handle all of the other cases.
+
+    // mode == Offset
+    //  add   scratch, base, offset_hi
+    //  ldr   dest, [scratch, +offset_lo]
+    //
+    // mode == PreIndex
+    //  add   base, base, offset_hi
+    //  ldr   dest, [base, +offset_lo]!
+
+    int bottom = off & 0xfff;
+    int neg_bottom = 0x1000 - bottom;
+
+    MOZ_ASSERT(rn != scratch);
+    MOZ_ASSERT(mode != PostIndex);
+
+    // At this point, both off - bottom and off + neg_bottom will be
+    // reasonable-ish quantities.
+    //
+    // Note a neg_bottom of 0x1000 can not be encoded as an immediate
+    // negative offset in the instruction and this occurs when bottom is
+    // zero, so this case is guarded against below.
+    if (off < 0) {
+      Operand2 sub_off = Imm8(-(off - bottom));  // sub_off = bottom - off
+      if (!sub_off.invalid()) {
+        // - sub_off = off - bottom
+        as_sub(scratch, rn, sub_off, LeaveCC, cc);
+        return as_dtr(ls, size, Offset, rt, DTRAddr(scratch, DtrOffImm(bottom)),
+                      cc);
+      }
+
+      // sub_off = -neg_bottom - off
+      sub_off = Imm8(-(off + neg_bottom));
+      if (!sub_off.invalid() && bottom != 0) {
+        // Guarded against by: bottom != 0
+        MOZ_ASSERT(neg_bottom < 0x1000);
+        // - sub_off = neg_bottom + off
+        as_sub(scratch, rn, sub_off, LeaveCC, cc);
+        return as_dtr(ls, size, Offset, rt,
+                      DTRAddr(scratch, DtrOffImm(-neg_bottom)), cc);
+      }
+    } else {
+      // sub_off = off - bottom
+      Operand2 sub_off = Imm8(off - bottom);
+      if (!sub_off.invalid()) {
+        //  sub_off = off - bottom
+        as_add(scratch, rn, sub_off, LeaveCC, cc);
+        return as_dtr(ls, size, Offset, rt, DTRAddr(scratch, DtrOffImm(bottom)),
+                      cc);
+      }
+
+      // sub_off = neg_bottom + off
+      sub_off = Imm8(off + neg_bottom);
+      if (!sub_off.invalid() && bottom != 0) {
+        // Guarded against by: bottom != 0
+        MOZ_ASSERT(neg_bottom < 0x1000);
+        // sub_off = neg_bottom + off
+        as_add(scratch, rn, sub_off, LeaveCC, cc);
+        return as_dtr(ls, size, Offset, rt,
+                      DTRAddr(scratch, DtrOffImm(-neg_bottom)), cc);
+      }
+    }
+
+    ma_mov(offset, scratch);
+    return as_dtr(ls, size, mode, rt,
+                  DTRAddr(rn, DtrRegImmShift(scratch, LSL, 0)));
+  } else {
+    // Should attempt to use the extended load/store instructions.
+    if (off < 256 && off > -256) {
+      return as_extdtr(ls, size, IsSigned, mode, rt,
+                       EDtrAddr(rn, EDtrOffImm(off)), cc);
+    }
+
+    // We cannot encode this offset in a single extldr. Try to encode it as
+    // an add scratch, base, imm; extldr dest, [scratch, +offset].
+    int bottom = off & 0xff;
+    int neg_bottom = 0x100 - bottom;
+    // At this point, both off - bottom and off + neg_bottom will be
+    // reasonable-ish quantities.
+    //
+    // Note a neg_bottom of 0x100 can not be encoded as an immediate
+    // negative offset in the instruction and this occurs when bottom is
+    // zero, so this case is guarded against below.
+    if (off < 0) {
+      // sub_off = bottom - off
+      Operand2 sub_off = Imm8(-(off - bottom));
+      if (!sub_off.invalid()) {
+        // - sub_off = off - bottom
+        as_sub(scratch, rn, sub_off, LeaveCC, cc);
+        return as_extdtr(ls, size, IsSigned, Offset, rt,
+                         EDtrAddr(scratch, EDtrOffImm(bottom)), cc);
+      }
+      // sub_off = -neg_bottom - off
+      sub_off = Imm8(-(off + neg_bottom));
+      if (!sub_off.invalid() && bottom != 0) {
+        // Guarded against by: bottom != 0
+        MOZ_ASSERT(neg_bottom < 0x100);
+        // - sub_off = neg_bottom + off
+        as_sub(scratch, rn, sub_off, LeaveCC, cc);
+        return as_extdtr(ls, size, IsSigned, Offset, rt,
+                         EDtrAddr(scratch, EDtrOffImm(-neg_bottom)), cc);
+      }
+    } else {
+      // sub_off = off - bottom
+      Operand2 sub_off = Imm8(off - bottom);
+      if (!sub_off.invalid()) {
+        // sub_off = off - bottom
+        as_add(scratch, rn, sub_off, LeaveCC, cc);
+        return as_extdtr(ls, size, IsSigned, Offset, rt,
+                         EDtrAddr(scratch, EDtrOffImm(bottom)), cc);
+      }
+      // sub_off = neg_bottom + off
+      sub_off = Imm8(off + neg_bottom);
+      if (!sub_off.invalid() && bottom != 0) {
+        // Guarded against by: bottom != 0
+        MOZ_ASSERT(neg_bottom < 0x100);
+        // sub_off = neg_bottom + off
+        as_add(scratch, rn, sub_off, LeaveCC, cc);
+        return as_extdtr(ls, size, IsSigned, Offset, rt,
+                         EDtrAddr(scratch, EDtrOffImm(-neg_bottom)), cc);
+      }
+    }
+    ma_mov(offset, scratch);
+    return as_extdtr(ls, size, IsSigned, mode, rt,
+                     EDtrAddr(rn, EDtrOffReg(scratch)), cc);
+  }
+}
+
+void MacroAssemblerARM::ma_pop(Register r) {
+  as_dtr(IsLoad, 32, PostIndex, r, DTRAddr(sp, DtrOffImm(4)));
+}
+
+void MacroAssemblerARM::ma_popn_pc(Imm32 n, AutoRegisterScope& scratch,
+                                   AutoRegisterScope& scratch2) {
+  // pc <- [sp]; sp += n
+  int32_t nv = n.value;
+
+  if (nv < 4096 && nv >= -4096) {
+    as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(nv)));
+  } else {
+    ma_mov(sp, scratch);
+    ma_add(Imm32(n), sp, scratch2);
+    as_dtr(IsLoad, 32, Offset, pc, DTRAddr(scratch, DtrOffImm(0)));
+  }
+}
+
+void MacroAssemblerARM::ma_push(Register r) {
+  MOZ_ASSERT(r != sp, "Use ma_push_sp().");
+  as_dtr(IsStore, 32, PreIndex, r, DTRAddr(sp, DtrOffImm(-4)));
+}
+
+void MacroAssemblerARM::ma_push_sp(Register r, AutoRegisterScope& scratch) {
+  // Pushing sp is not well-defined: use two instructions.
+  MOZ_ASSERT(r == sp);
+  ma_mov(sp, scratch);
+  as_dtr(IsStore, 32, PreIndex, scratch, DTRAddr(sp, DtrOffImm(-4)));
+}
+
+void MacroAssemblerARM::ma_vpop(VFPRegister r) {
+  startFloatTransferM(IsLoad, sp, IA, WriteBack);
+  transferFloatReg(r);
+  finishFloatTransfer();
+}
+
+void MacroAssemblerARM::ma_vpush(VFPRegister r) {
+  startFloatTransferM(IsStore, sp, DB, WriteBack);
+  transferFloatReg(r);
+  finishFloatTransfer();
+}
+
+// Barriers
+void MacroAssemblerARM::ma_dmb(BarrierOption option) {
+  if (HasDMBDSBISB()) {
+    as_dmb(option);
+  } else {
+    as_dmb_trap();
+  }
+}
+
+void MacroAssemblerARM::ma_dsb(BarrierOption option) {
+  if (HasDMBDSBISB()) {
+    as_dsb(option);
+  } else {
+    as_dsb_trap();
+  }
+}
+
+// Branches when done from within arm-specific code.
+BufferOffset MacroAssemblerARM::ma_b(Label* dest, Assembler::Condition c) {
+  return as_b(dest, c);
+}
+
+void MacroAssemblerARM::ma_bx(Register dest, Assembler::Condition c) {
+  as_bx(dest, c);
+}
+
+void MacroAssemblerARM::ma_b(void* target, Assembler::Condition c) {
+  // An immediate pool is used for easier patching.
+  as_Imm32Pool(pc, uint32_t(target), c);
+}
+
+// This is almost NEVER necessary: we'll basically never be calling a label,
+// except possibly in the crazy bailout-table case.
+void MacroAssemblerARM::ma_bl(Label* dest, Assembler::Condition c) {
+  as_bl(dest, c);
+}
+
+void MacroAssemblerARM::ma_blx(Register reg, Assembler::Condition c) {
+  as_blx(reg, c);
+}
+
+// VFP/ALU
+void MacroAssemblerARM::ma_vadd(FloatRegister src1, FloatRegister src2,
+                                FloatRegister dst) {
+  as_vadd(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
+}
+
+void MacroAssemblerARM::ma_vadd_f32(FloatRegister src1, FloatRegister src2,
+                                    FloatRegister dst) {
+  as_vadd(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+          VFPRegister(src2).singleOverlay());
+}
+
+void MacroAssemblerARM::ma_vsub(FloatRegister src1, FloatRegister src2,
+                                FloatRegister dst) {
+  as_vsub(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
+}
+
+void MacroAssemblerARM::ma_vsub_f32(FloatRegister src1, FloatRegister src2,
+                                    FloatRegister dst) {
+  as_vsub(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+          VFPRegister(src2).singleOverlay());
+}
+
+void MacroAssemblerARM::ma_vmul(FloatRegister src1, FloatRegister src2,
+                                FloatRegister dst) {
+  as_vmul(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
+}
+
+void MacroAssemblerARM::ma_vmul_f32(FloatRegister src1, FloatRegister src2,
+                                    FloatRegister dst) {
+  as_vmul(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+          VFPRegister(src2).singleOverlay());
+}
+
+void MacroAssemblerARM::ma_vdiv(FloatRegister src1, FloatRegister src2,
+                                FloatRegister dst) {
+  as_vdiv(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
+}
+
+void MacroAssemblerARM::ma_vdiv_f32(FloatRegister src1, FloatRegister src2,
+                                    FloatRegister dst) {
+  as_vdiv(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+          VFPRegister(src2).singleOverlay());
+}
+
+void MacroAssemblerARM::ma_vmov(FloatRegister src, FloatRegister dest,
+                                Condition cc) {
+  as_vmov(dest, src, cc);
+}
+
+void MacroAssemblerARM::ma_vmov_f32(FloatRegister src, FloatRegister dest,
+                                    Condition cc) {
+  as_vmov(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+          cc);
+}
+
+void MacroAssemblerARM::ma_vneg(FloatRegister src, FloatRegister dest,
+                                Condition cc) {
+  as_vneg(dest, src, cc);
+}
+
+void MacroAssemblerARM::ma_vneg_f32(FloatRegister src, FloatRegister dest,
+                                    Condition cc) {
+  as_vneg(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+          cc);
+}
+
+void MacroAssemblerARM::ma_vabs(FloatRegister src, FloatRegister dest,
+                                Condition cc) {
+  as_vabs(dest, src, cc);
+}
+
+void MacroAssemblerARM::ma_vabs_f32(FloatRegister src, FloatRegister dest,
+                                    Condition cc) {
+  as_vabs(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+          cc);
+}
+
+void MacroAssemblerARM::ma_vsqrt(FloatRegister src, FloatRegister dest,
+                                 Condition cc) {
+  as_vsqrt(dest, src, cc);
+}
+
+void MacroAssemblerARM::ma_vsqrt_f32(FloatRegister src, FloatRegister dest,
+                                     Condition cc) {
+  as_vsqrt(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+           cc);
+}
+
+static inline uint32_t DoubleHighWord(double d) {
+  return static_cast<uint32_t>(BitwiseCast<uint64_t>(d) >> 32);
+}
+
+static inline uint32_t DoubleLowWord(double d) {
+  return static_cast<uint32_t>(BitwiseCast<uint64_t>(d)) & uint32_t(0xffffffff);
+}
+
+void MacroAssemblerARM::ma_vimm(double value, FloatRegister dest,
+                                Condition cc) {
+  if (HasVFPv3()) {
+    if (DoubleLowWord(value) == 0) {
+      if (DoubleHighWord(value) == 0) {
+        // To zero a register, load 1.0, then execute dN <- dN - dN
+        as_vimm(dest, VFPImm::One, cc);
+        as_vsub(dest, dest, dest, cc);
+        return;
+      }
+
+      VFPImm enc(DoubleHighWord(value));
+      if (enc.isValid()) {
+        as_vimm(dest, enc, cc);
+        return;
+      }
+    }
+  }
+  // Fall back to putting the value in a pool.
+  as_FImm64Pool(dest, value, cc);
+}
+
+void MacroAssemblerARM::ma_vimm_f32(float value, FloatRegister dest,
+                                    Condition cc) {
+  VFPRegister vd = VFPRegister(dest).singleOverlay();
+  if (HasVFPv3()) {
+    if (IsPositiveZero(value)) {
+      // To zero a register, load 1.0, then execute sN <- sN - sN.
+      as_vimm(vd, VFPImm::One, cc);
+      as_vsub(vd, vd, vd, cc);
+      return;
+    }
+
+    // Note that the vimm immediate float32 instruction encoding differs
+    // from the vimm immediate double encoding, but this difference matches
+    // the difference in the floating point formats, so it is possible to
+    // convert the float32 to a double and then use the double encoding
+    // paths. It is still necessary to firstly check that the double low
+    // word is zero because some float32 numbers set these bits and this can
+    // not be ignored.
+    double doubleValue(value);
+    if (DoubleLowWord(doubleValue) == 0) {
+      VFPImm enc(DoubleHighWord(doubleValue));
+      if (enc.isValid()) {
+        as_vimm(vd, enc, cc);
+        return;
+      }
+    }
+  }
+
+  // Fall back to putting the value in a pool.
+  as_FImm32Pool(vd, value, cc);
+}
+
+void MacroAssemblerARM::ma_vcmp(FloatRegister src1, FloatRegister src2,
+                                Condition cc) {
+  as_vcmp(VFPRegister(src1), VFPRegister(src2), cc);
+}
+
+void MacroAssemblerARM::ma_vcmp_f32(FloatRegister src1, FloatRegister src2,
+                                    Condition cc) {
+  as_vcmp(VFPRegister(src1).singleOverlay(), VFPRegister(src2).singleOverlay(),
+          cc);
+}
+
+void MacroAssemblerARM::ma_vcmpz(FloatRegister src1, Condition cc) {
+  as_vcmpz(VFPRegister(src1), cc);
+}
+
+void MacroAssemblerARM::ma_vcmpz_f32(FloatRegister src1, Condition cc) {
+  as_vcmpz(VFPRegister(src1).singleOverlay(), cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_F64_I32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isDouble());
+  MOZ_ASSERT(dest.isSInt());
+  as_vcvt(dest, src, false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_F64_U32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isDouble());
+  MOZ_ASSERT(dest.isUInt());
+  as_vcvt(dest, src, false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_I32_F64(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isSInt());
+  MOZ_ASSERT(dest.isDouble());
+  as_vcvt(dest, src, false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_U32_F64(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isUInt());
+  MOZ_ASSERT(dest.isDouble());
+  as_vcvt(dest, src, false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_F32_I32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isSingle());
+  MOZ_ASSERT(dest.isSInt());
+  as_vcvt(VFPRegister(dest).sintOverlay(), VFPRegister(src).singleOverlay(),
+          false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_F32_U32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isSingle());
+  MOZ_ASSERT(dest.isUInt());
+  as_vcvt(VFPRegister(dest).uintOverlay(), VFPRegister(src).singleOverlay(),
+          false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_I32_F32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isSInt());
+  MOZ_ASSERT(dest.isSingle());
+  as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src).sintOverlay(),
+          false, cc);
+}
+
+void MacroAssemblerARM::ma_vcvt_U32_F32(FloatRegister src, FloatRegister dest,
+                                        Condition cc) {
+  MOZ_ASSERT(src.isUInt());
+  MOZ_ASSERT(dest.isSingle());
+  as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src).uintOverlay(),
+          false, cc);
+}
+
+void MacroAssemblerARM::ma_vxfer(FloatRegister src, Register dest,
+                                 Condition cc) {
+  as_vxfer(dest, InvalidReg, VFPRegister(src).singleOverlay(), FloatToCore, cc);
+}
+
+void MacroAssemblerARM::ma_vxfer(FloatRegister src, Register dest1,
+                                 Register dest2, Condition cc) {
+  as_vxfer(dest1, dest2, VFPRegister(src), FloatToCore, cc);
+}
+
+void MacroAssemblerARM::ma_vxfer(Register src, FloatRegister dest,
+                                 Condition cc) {
+  as_vxfer(src, InvalidReg, VFPRegister(dest).singleOverlay(), CoreToFloat, cc);
+}
+
+void MacroAssemblerARM::ma_vxfer(Register src1, Register src2,
+                                 FloatRegister dest, Condition cc) {
+  as_vxfer(src1, src2, VFPRegister(dest), CoreToFloat, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vdtr(LoadStore ls, const Address& addr,
+                                        VFPRegister rt,
+                                        AutoRegisterScope& scratch,
+                                        Condition cc) {
+  int off = addr.offset;
+  MOZ_ASSERT((off & 3) == 0);
+  Register base = addr.base;
+  if (off > -1024 && off < 1024) {
+    return as_vdtr(ls, rt, Operand(addr).toVFPAddr(), cc);
+  }
+
+  // We cannot encode this offset in a a single ldr. Try to encode it as an
+  // add scratch, base, imm; ldr dest, [scratch, +offset].
+  int bottom = off & (0xff << 2);
+  int neg_bottom = (0x100 << 2) - bottom;
+  // At this point, both off - bottom and off + neg_bottom will be
+  // reasonable-ish quantities.
+  //
+  // Note a neg_bottom of 0x400 can not be encoded as an immediate negative
+  // offset in the instruction and this occurs when bottom is zero, so this
+  // case is guarded against below.
+  if (off < 0) {
+    // sub_off = bottom - off
+    Operand2 sub_off = Imm8(-(off - bottom));
+    if (!sub_off.invalid()) {
+      // - sub_off = off - bottom
+      as_sub(scratch, base, sub_off, LeaveCC, cc);
+      return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(bottom)), cc);
+    }
+    // sub_off = -neg_bottom - off
+    sub_off = Imm8(-(off + neg_bottom));
+    if (!sub_off.invalid() && bottom != 0) {
+      // Guarded against by: bottom != 0
+      MOZ_ASSERT(neg_bottom < 0x400);
+      // - sub_off = neg_bottom + off
+      as_sub(scratch, base, sub_off, LeaveCC, cc);
+      return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(-neg_bottom)), cc);
+    }
+  } else {
+    // sub_off = off - bottom
+    Operand2 sub_off = Imm8(off - bottom);
+    if (!sub_off.invalid()) {
+      // sub_off = off - bottom
+      as_add(scratch, base, sub_off, LeaveCC, cc);
+      return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(bottom)), cc);
+    }
+    // sub_off = neg_bottom + off
+    sub_off = Imm8(off + neg_bottom);
+    if (!sub_off.invalid() && bottom != 0) {
+      // Guarded against by: bottom != 0
+      MOZ_ASSERT(neg_bottom < 0x400);
+      // sub_off = neg_bottom + off
+      as_add(scratch, base, sub_off, LeaveCC, cc);
+      return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(-neg_bottom)), cc);
+    }
+  }
+
+  // Safe to use scratch as dest, since ma_add() overwrites dest at the end
+  // and can't use it as internal scratch since it may also == base.
+  ma_add(base, Imm32(off), scratch, scratch, LeaveCC, cc);
+  return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(0)), cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vldr(VFPAddr addr, VFPRegister dest,
+                                        Condition cc) {
+  return as_vdtr(IsLoad, dest, addr, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vldr(const Address& addr, VFPRegister dest,
+                                        AutoRegisterScope& scratch,
+                                        Condition cc) {
+  return ma_vdtr(IsLoad, addr, dest, scratch, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vldr(VFPRegister src, Register base,
+                                        Register index,
+                                        AutoRegisterScope& scratch,
+                                        int32_t shift, Condition cc) {
+  as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
+  return as_vdtr(IsLoad, src, Operand(Address(scratch, 0)).toVFPAddr(), cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vstr(VFPRegister src, VFPAddr addr,
+                                        Condition cc) {
+  return as_vdtr(IsStore, src, addr, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vstr(VFPRegister src, const Address& addr,
+                                        AutoRegisterScope& scratch,
+                                        Condition cc) {
+  return ma_vdtr(IsStore, addr, src, scratch, cc);
+}
+
+BufferOffset MacroAssemblerARM::ma_vstr(
+    VFPRegister src, Register base, Register index, AutoRegisterScope& scratch,
+    AutoRegisterScope& scratch2, int32_t shift, int32_t offset, Condition cc) {
+  as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
+  return ma_vstr(src, Address(scratch, offset), scratch2, cc);
+}
+
+// Without an offset, no second scratch register is necessary.
+BufferOffset MacroAssemblerARM::ma_vstr(VFPRegister src, Register base,
+                                        Register index,
+                                        AutoRegisterScope& scratch,
+                                        int32_t shift, Condition cc) {
+  as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
+  return as_vdtr(IsStore, src, Operand(Address(scratch, 0)).toVFPAddr(), cc);
+}
+
+bool MacroAssemblerARMCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) {
+  asMasm().PushFrameDescriptor(FrameType::IonJS);  // descriptor_
+  asMasm().Push(ImmPtr(fakeReturnAddr));
+  asMasm().Push(FramePointer);
+  return true;
+}
+
+void MacroAssemblerARMCompat::move32(Imm32 imm, Register dest) {
+  ma_mov(imm, dest);
+}
+
+void MacroAssemblerARMCompat::move32(Register src, Register dest) {
+  ma_mov(src, dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(Register src, Register dest) {
+  ma_mov(src, dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(ImmWord imm, Register dest) {
+  ma_mov(Imm32(imm.value), dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(ImmGCPtr imm, Register dest) {
+  ma_mov(imm, dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(ImmPtr imm, Register dest) {
+  movePtr(ImmWord(uintptr_t(imm.value)), dest);
+}
+
+void MacroAssemblerARMCompat::movePtr(wasm::SymbolicAddress imm,
+                                      Register dest) {
+  append(wasm::SymbolicAccess(CodeOffset(currentOffset()), imm));
+  ma_movPatchable(Imm32(-1), dest, Always);
+}
+
+void MacroAssemblerARMCompat::load8ZeroExtend(const Address& address,
+                                              Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsLoad, 8, false, address.base, Imm32(address.offset), dest,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::load8ZeroExtend(const BaseIndex& src,
+                                              Register dest) {
+  Register base = src.base;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (src.offset == 0) {
+    ma_ldrb(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), dest);
+  } else {
+    ma_add(base, Imm32(src.offset), scratch, scratch2);
+    ma_ldrb(DTRAddr(scratch, DtrRegImmShift(src.index, LSL, scale)), dest);
+  }
+}
+
+void MacroAssemblerARMCompat::load8SignExtend(const Address& address,
+                                              Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsLoad, 8, true, address.base, Imm32(address.offset), dest,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::load8SignExtend(const BaseIndex& src,
+                                              Register dest) {
+  Register index = src.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // ARMv7 does not have LSL on an index register with an extended load.
+  if (src.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (src.offset != 0) {
+    if (index != scratch) {
+      ma_mov(index, scratch);
+      index = scratch;
+    }
+    ma_add(Imm32(src.offset), index, scratch2);
+  }
+  ma_ldrsb(EDtrAddr(src.base, EDtrOffReg(index)), dest);
+}
+
+void MacroAssemblerARMCompat::load16ZeroExtend(const Address& address,
+                                               Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsLoad, 16, false, address.base, Imm32(address.offset), dest,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::load16ZeroExtend(const BaseIndex& src,
+                                               Register dest) {
+  Register index = src.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // ARMv7 does not have LSL on an index register with an extended load.
+  if (src.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (src.offset != 0) {
+    if (index != scratch) {
+      ma_mov(index, scratch);
+      index = scratch;
+    }
+    ma_add(Imm32(src.offset), index, scratch2);
+  }
+  ma_ldrh(EDtrAddr(src.base, EDtrOffReg(index)), dest);
+}
+
+void MacroAssemblerARMCompat::load16SignExtend(const Address& address,
+                                               Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsLoad, 16, true, address.base, Imm32(address.offset), dest,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::load16SignExtend(const BaseIndex& src,
+                                               Register dest) {
+  Register index = src.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // We don't have LSL on index register yet.
+  if (src.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (src.offset != 0) {
+    if (index != scratch) {
+      ma_mov(index, scratch);
+      index = scratch;
+    }
+    ma_add(Imm32(src.offset), index, scratch2);
+  }
+  ma_ldrsh(EDtrAddr(src.base, EDtrOffReg(index)), dest);
+}
+
+void MacroAssemblerARMCompat::load32(const Address& address, Register dest) {
+  loadPtr(address, dest);
+}
+
+void MacroAssemblerARMCompat::load32(const BaseIndex& address, Register dest) {
+  loadPtr(address, dest);
+}
+
+void MacroAssemblerARMCompat::load32(AbsoluteAddress address, Register dest) {
+  loadPtr(address, dest);
+}
+
+void MacroAssemblerARMCompat::loadPtr(const Address& address, Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_ldr(address, dest, scratch);
+}
+
+void MacroAssemblerARMCompat::loadPtr(const BaseIndex& src, Register dest) {
+  Register base = src.base;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (src.offset != 0) {
+    ma_add(base, Imm32(src.offset), scratch, scratch2);
+    ma_ldr(DTRAddr(scratch, DtrRegImmShift(src.index, LSL, scale)), dest);
+  } else {
+    ma_ldr(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), dest);
+  }
+}
+
+void MacroAssemblerARMCompat::loadPtr(AbsoluteAddress address, Register dest) {
+  MOZ_ASSERT(dest != pc);  // Use dest as a scratch register.
+  movePtr(ImmWord(uintptr_t(address.addr)), dest);
+  loadPtr(Address(dest, 0), dest);
+}
+
+void MacroAssemblerARMCompat::loadPtr(wasm::SymbolicAddress address,
+                                      Register dest) {
+  MOZ_ASSERT(dest != pc);  // Use dest as a scratch register.
+  movePtr(address, dest);
+  loadPtr(Address(dest, 0), dest);
+}
+
+void MacroAssemblerARMCompat::loadPrivate(const Address& address,
+                                          Register dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_ldr(ToPayload(address), dest, scratch);
+}
+
+void MacroAssemblerARMCompat::loadDouble(const Address& address,
+                                         FloatRegister dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_vldr(address, dest, scratch);
+}
+
+void MacroAssemblerARMCompat::loadDouble(const BaseIndex& src,
+                                         FloatRegister dest) {
+  // VFP instructions don't even support register Base + register Index modes,
+  // so just add the index, then handle the offset like normal.
+  Register base = src.base;
+  Register index = src.index;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+  int32_t offset = src.offset;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  as_add(scratch, base, lsl(index, scale));
+  ma_vldr(Address(scratch, offset), dest, scratch2);
+}
+
+void MacroAssemblerARMCompat::loadFloatAsDouble(const Address& address,
+                                                FloatRegister dest) {
+  ScratchRegisterScope scratch(asMasm());
+
+  VFPRegister rt = dest;
+  ma_vldr(address, rt.singleOverlay(), scratch);
+  as_vcvt(rt, rt.singleOverlay());
+}
+
+void MacroAssemblerARMCompat::loadFloatAsDouble(const BaseIndex& src,
+                                                FloatRegister dest) {
+  // VFP instructions don't even support register Base + register Index modes,
+  // so just add the index, then handle the offset like normal.
+  Register base = src.base;
+  Register index = src.index;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+  int32_t offset = src.offset;
+  VFPRegister rt = dest;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  as_add(scratch, base, lsl(index, scale));
+  ma_vldr(Address(scratch, offset), rt.singleOverlay(), scratch2);
+  as_vcvt(rt, rt.singleOverlay());
+}
+
+void MacroAssemblerARMCompat::loadFloat32(const Address& address,
+                                          FloatRegister dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_vldr(address, VFPRegister(dest).singleOverlay(), scratch);
+}
+
+void MacroAssemblerARMCompat::loadFloat32(const BaseIndex& src,
+                                          FloatRegister dest) {
+  // VFP instructions don't even support register Base + register Index modes,
+  // so just add the index, then handle the offset like normal.
+  Register base = src.base;
+  Register index = src.index;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+  int32_t offset = src.offset;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  as_add(scratch, base, lsl(index, scale));
+  ma_vldr(Address(scratch, offset), VFPRegister(dest).singleOverlay(),
+          scratch2);
+}
+
+void MacroAssemblerARMCompat::store8(Imm32 imm, const Address& address) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_mov(imm, scratch2);
+  store8(scratch2, address);
+}
+
+void MacroAssemblerARMCompat::store8(Register src, const Address& address) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsStore, 8, false, address.base, Imm32(address.offset), src,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::store8(Imm32 imm, const BaseIndex& dest) {
+  Register base = dest.base;
+  uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (dest.offset != 0) {
+    ma_add(base, Imm32(dest.offset), scratch, scratch2);
+    ma_mov(imm, scratch2);
+    ma_strb(scratch2, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+  } else {
+    ma_mov(imm, scratch2);
+    ma_strb(scratch2, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::store8(Register src, const BaseIndex& dest) {
+  Register base = dest.base;
+  uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (dest.offset != 0) {
+    ma_add(base, Imm32(dest.offset), scratch, scratch2);
+    ma_strb(src, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+  } else {
+    ma_strb(src, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::store16(Imm32 imm, const Address& address) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_mov(imm, scratch2);
+  store16(scratch2, address);
+}
+
+void MacroAssemblerARMCompat::store16(Register src, const Address& address) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsStore, 16, false, address.base, Imm32(address.offset), src,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::store16(Imm32 imm, const BaseIndex& dest) {
+  Register index = dest.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // We don't have LSL on index register yet.
+  if (dest.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(dest.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (dest.offset != 0) {
+    ma_add(index, Imm32(dest.offset), scratch, scratch2);
+    index = scratch;
+  }
+
+  ma_mov(imm, scratch2);
+  ma_strh(scratch2, EDtrAddr(dest.base, EDtrOffReg(index)));
+}
+
+void MacroAssemblerARMCompat::store16(Register src, const BaseIndex& address) {
+  Register index = address.index;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  // We don't have LSL on index register yet.
+  if (address.scale != TimesOne) {
+    ma_lsl(Imm32::ShiftOf(address.scale), index, scratch);
+    index = scratch;
+  }
+
+  if (address.offset != 0) {
+    ma_add(index, Imm32(address.offset), scratch, scratch2);
+    index = scratch;
+  }
+  ma_strh(src, EDtrAddr(address.base, EDtrOffReg(index)));
+}
+
+void MacroAssemblerARMCompat::store32(Register src, AbsoluteAddress address) {
+  storePtr(src, address);
+}
+
+void MacroAssemblerARMCompat::store32(Register src, const Address& address) {
+  storePtr(src, address);
+}
+
+void MacroAssemblerARMCompat::store32(Imm32 src, const Address& address) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  move32(src, scratch);
+  ma_str(scratch, address, scratch2);
+}
+
+void MacroAssemblerARMCompat::store32(Imm32 imm, const BaseIndex& dest) {
+  Register base = dest.base;
+  uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (dest.offset != 0) {
+    ma_add(base, Imm32(dest.offset), scratch, scratch2);
+    ma_mov(imm, scratch2);
+    ma_str(scratch2, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+  } else {
+    ma_mov(imm, scratch);
+    ma_str(scratch, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::store32(Register src, const BaseIndex& dest) {
+  Register base = dest.base;
+  uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (dest.offset != 0) {
+    ma_add(base, Imm32(dest.offset), scratch, scratch2);
+    ma_str(src, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+  } else {
+    ma_str(src, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmWord imm, const Address& address) {
+  store32(Imm32(imm.value), address);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmWord imm, const BaseIndex& address) {
+  store32(Imm32(imm.value), address);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmPtr imm, const Address& address) {
+  store32(Imm32(uintptr_t(imm.value)), address);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmPtr imm, const BaseIndex& address) {
+  store32(Imm32(uintptr_t(imm.value)), address);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmGCPtr imm, const Address& address) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_mov(imm, scratch);
+  ma_str(scratch, address, scratch2);
+}
+
+void MacroAssemblerARMCompat::storePtr(ImmGCPtr imm, const BaseIndex& address) {
+  Register base = address.base;
+  uint32_t scale = Imm32::ShiftOf(address.scale).value;
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (address.offset != 0) {
+    ma_add(base, Imm32(address.offset), scratch, scratch2);
+    ma_mov(imm, scratch2);
+    ma_str(scratch2,
+           DTRAddr(scratch, DtrRegImmShift(address.index, LSL, scale)));
+  } else {
+    ma_mov(imm, scratch);
+    ma_str(scratch, DTRAddr(base, DtrRegImmShift(address.index, LSL, scale)));
+  }
+}
+
+void MacroAssemblerARMCompat::storePtr(Register src, const Address& address) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_str(src, address, scratch2);
+}
+
+void MacroAssemblerARMCompat::storePtr(Register src, const BaseIndex& address) {
+  store32(src, address);
+}
+
+void MacroAssemblerARMCompat::storePtr(Register src, AbsoluteAddress dest) {
+  ScratchRegisterScope scratch(asMasm());
+  movePtr(ImmWord(uintptr_t(dest.addr)), scratch);
+  ma_str(src, DTRAddr(scratch, DtrOffImm(0)));
+}
+
+// Note: this function clobbers the input register.
+void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
+  if (HasVFPv3()) {
+    Label notSplit;
+    {
+      ScratchDoubleScope scratchDouble(*this);
+      MOZ_ASSERT(input != scratchDouble);
+      loadConstantDouble(0.5, scratchDouble);
+
+      ma_vadd(input, scratchDouble, scratchDouble);
+      // Convert the double into an unsigned fixed point value with 24 bits of
+      // precision. The resulting number will look like 0xII.DDDDDD
+      as_vcvtFixed(scratchDouble, false, 24, true);
+    }
+
+    // Move the fixed point value into an integer register.
+    {
+      ScratchFloat32Scope scratchFloat(*this);
+      as_vxfer(output, InvalidReg, scratchFloat.uintOverlay(), FloatToCore);
+    }
+
+    ScratchRegisterScope scratch(*this);
+
+    // See if this value *might* have been an exact integer after adding
+    // 0.5. This tests the 1/2 through 1/16,777,216th places, but 0.5 needs
+    // to be tested out to the 1/140,737,488,355,328th place.
+    ma_tst(output, Imm32(0x00ffffff), scratch);
+    // Convert to a uint8 by shifting out all of the fraction bits.
+    ma_lsr(Imm32(24), output, output);
+    // If any of the bottom 24 bits were non-zero, then we're good, since
+    // this number can't be exactly XX.0
+    ma_b(&notSplit, NonZero);
+    as_vxfer(scratch, InvalidReg, input, FloatToCore);
+    as_cmp(scratch, Imm8(0));
+    // If the lower 32 bits of the double were 0, then this was an exact number,
+    // and it should be even.
+    as_bic(output, output, Imm8(1), LeaveCC, Zero);
+    bind(&notSplit);
+  } else {
+    ScratchDoubleScope scratchDouble(*this);
+    MOZ_ASSERT(input != scratchDouble);
+    loadConstantDouble(0.5, scratchDouble);
+
+    Label outOfRange;
+    ma_vcmpz(input);
+    // Do the add, in place so we can reference it later.
+    ma_vadd(input, scratchDouble, input);
+    // Do the conversion to an integer.
+    as_vcvt(VFPRegister(scratchDouble).uintOverlay(), VFPRegister(input));
+    // Copy the converted value out.
+    as_vxfer(output, InvalidReg, scratchDouble, FloatToCore);
+    as_vmrs(pc);
+    ma_mov(Imm32(0), output, Overflow);  // NaN => 0
+    ma_b(&outOfRange, Overflow);         // NaN
+    as_cmp(output, Imm8(0xff));
+    ma_mov(Imm32(0xff), output, Above);
+    ma_b(&outOfRange, Above);
+    // Convert it back to see if we got the same value back.
+    as_vcvt(scratchDouble, VFPRegister(scratchDouble).uintOverlay());
+    // Do the check.
+    as_vcmp(scratchDouble, input);
+    as_vmrs(pc);
+    as_bic(output, output, Imm8(1), LeaveCC, Zero);
+    bind(&outOfRange);
+  }
+}
+
+void MacroAssemblerARMCompat::cmp32(Register lhs, Imm32 rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_cmp(lhs, rhs, scratch);
+}
+
+void MacroAssemblerARMCompat::cmp32(Register lhs, Register rhs) {
+  ma_cmp(lhs, rhs);
+}
+
+void MacroAssemblerARMCompat::cmp32(const Address& lhs, Imm32 rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs, scratch2);
+}
+
+void MacroAssemblerARMCompat::cmp32(const Address& lhs, Register rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmWord rhs) {
+  cmp32(lhs, Imm32(rhs.value));
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmPtr rhs) {
+  cmpPtr(lhs, ImmWord(uintptr_t(rhs.value)));
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, Register rhs) {
+  ma_cmp(lhs, rhs);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmGCPtr rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_cmp(lhs, rhs, scratch);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(Register lhs, Imm32 rhs) {
+  cmp32(lhs, rhs);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, Register rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmWord rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, Imm32(rhs.value), scratch2);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmPtr rhs) {
+  cmpPtr(lhs, ImmWord(uintptr_t(rhs.value)));
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmGCPtr rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs, scratch2);
+}
+
+void MacroAssemblerARMCompat::cmpPtr(const Address& lhs, Imm32 rhs) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(lhs, scratch, scratch2);
+  ma_cmp(scratch, rhs, scratch2);
+}
+
+void MacroAssemblerARMCompat::setStackArg(Register reg, uint32_t arg) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_dataTransferN(IsStore, 32, true, sp, Imm32(arg * sizeof(intptr_t)), reg,
+                   scratch);
+}
+
+void MacroAssemblerARMCompat::minMaxDouble(FloatRegister srcDest,
+                                           FloatRegister second, bool canBeNaN,
+                                           bool isMax) {
+  FloatRegister first = srcDest;
+
+  Label nan, equal, returnSecond, done;
+
+  Assembler::Condition cond = isMax ? Assembler::VFP_LessThanOrEqual
+                                    : Assembler::VFP_GreaterThanOrEqual;
+
+  compareDouble(first, second);
+  // First or second is NaN, result is NaN.
+  ma_b(&nan, Assembler::VFP_Unordered);
+  // Make sure we handle -0 and 0 right.
+  ma_b(&equal, Assembler::VFP_Equal);
+  ma_b(&returnSecond, cond);
+  ma_b(&done);
+
+  // Check for zero.
+  bind(&equal);
+  compareDouble(first, NoVFPRegister);
+  // First wasn't 0 or -0, so just return it.
+  ma_b(&done, Assembler::VFP_NotEqualOrUnordered);
+  // So now both operands are either -0 or 0.
+  if (isMax) {
+    // -0 + -0 = -0 and -0 + 0 = 0.
+    ma_vadd(second, first, first);
+  } else {
+    ma_vneg(first, first);
+    ma_vsub(first, second, first);
+    ma_vneg(first, first);
+  }
+  ma_b(&done);
+
+  bind(&nan);
+  // If the first argument is the NaN, return it; otherwise return the second
+  // operand.
+  compareDouble(first, first);
+  ma_vmov(first, srcDest, Assembler::VFP_Unordered);
+  ma_b(&done, Assembler::VFP_Unordered);
+
+  bind(&returnSecond);
+  ma_vmov(second, srcDest);
+
+  bind(&done);
+}
+
+void MacroAssemblerARMCompat::minMaxFloat32(FloatRegister srcDest,
+                                            FloatRegister second, bool canBeNaN,
+                                            bool isMax) {
+  FloatRegister first = srcDest;
+
+  Label nan, equal, returnSecond, done;
+
+  Assembler::Condition cond = isMax ? Assembler::VFP_LessThanOrEqual
+                                    : Assembler::VFP_GreaterThanOrEqual;
+
+  compareFloat(first, second);
+  // First or second is NaN, result is NaN.
+  ma_b(&nan, Assembler::VFP_Unordered);
+  // Make sure we handle -0 and 0 right.
+  ma_b(&equal, Assembler::VFP_Equal);
+  ma_b(&returnSecond, cond);
+  ma_b(&done);
+
+  // Check for zero.
+  bind(&equal);
+  compareFloat(first, NoVFPRegister);
+  // First wasn't 0 or -0, so just return it.
+  ma_b(&done, Assembler::VFP_NotEqualOrUnordered);
+  // So now both operands are either -0 or 0.
+  if (isMax) {
+    // -0 + -0 = -0 and -0 + 0 = 0.
+    ma_vadd_f32(second, first, first);
+  } else {
+    ma_vneg_f32(first, first);
+    ma_vsub_f32(first, second, first);
+    ma_vneg_f32(first, first);
+  }
+  ma_b(&done);
+
+  bind(&nan);
+  // See comment in minMaxDouble.
+  compareFloat(first, first);
+  ma_vmov_f32(first, srcDest, Assembler::VFP_Unordered);
+  ma_b(&done, Assembler::VFP_Unordered);
+
+  bind(&returnSecond);
+  ma_vmov_f32(second, srcDest);
+
+  bind(&done);
+}
+
+void MacroAssemblerARMCompat::compareDouble(FloatRegister lhs,
+                                            FloatRegister rhs) {
+  // Compare the doubles, setting vector status flags.
+  if (rhs.isMissing()) {
+    ma_vcmpz(lhs);
+  } else {
+    ma_vcmp(lhs, rhs);
+  }
+
+  // Move vector status bits to normal status flags.
+  as_vmrs(pc);
+}
+
+void MacroAssemblerARMCompat::compareFloat(FloatRegister lhs,
+                                           FloatRegister rhs) {
+  // Compare the doubles, setting vector status flags.
+  if (rhs.isMissing()) {
+    as_vcmpz(VFPRegister(lhs).singleOverlay());
+  } else {
+    as_vcmp(VFPRegister(lhs).singleOverlay(), VFPRegister(rhs).singleOverlay());
+  }
+
+  // Move vector status bits to normal status flags.
+  as_vmrs(pc);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testInt32(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_INT32));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBoolean(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_BOOLEAN));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDouble(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  Assembler::Condition actual = (cond == Equal) ? Below : AboveOrEqual;
+  ScratchRegisterScope scratch(asMasm());
+  ma_cmp(value.typeReg(), ImmTag(JSVAL_TAG_CLEAR), scratch);
+  return actual;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNull(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_NULL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testUndefined(
+    Assembler::Condition cond, const ValueOperand& value) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_UNDEFINED));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testString(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testString(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testSymbol(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testSymbol(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigInt(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testBigInt(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testObject(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testObject(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNumber(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testNumber(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testMagic(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testMagic(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testPrimitive(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testPrimitive(cond, value.typeReg());
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testGCThing(
+    Assembler::Condition cond, const ValueOperand& value) {
+  return testGCThing(cond, value.typeReg());
+}
+
+// Register-based tests.
+Assembler::Condition MacroAssemblerARMCompat::testInt32(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_INT32));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBoolean(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_BOOLEAN));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNull(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_NULL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testUndefined(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_UNDEFINED));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testString(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_STRING));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testSymbol(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_SYMBOL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigInt(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_BIGINT));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testObject(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_OBJECT));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testMagic(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_MAGIC));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testPrimitive(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JS::detail::ValueUpperExclPrimitiveTag));
+  return cond == Equal ? Below : AboveOrEqual;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testGCThing(
+    Assembler::Condition cond, Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag));
+  return cond == Equal ? AboveOrEqual : Below;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testGCThing(
+    Assembler::Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag));
+  return cond == Equal ? AboveOrEqual : Below;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testMagic(
+    Assembler::Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_MAGIC));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testInt32(
+    Assembler::Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_INT32));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDouble(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testDouble(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBoolean(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testBoolean(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNull(Condition cond,
+                                                       const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testNull(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testUndefined(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testUndefined(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testString(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testString(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testSymbol(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testSymbol(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigInt(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testBigInt(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testObject(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testObject(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNumber(
+    Condition cond, const Address& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  return testNumber(cond, tag);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDouble(Condition cond,
+                                                         Register tag) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  Condition actual = (cond == Equal) ? Below : AboveOrEqual;
+  ma_cmp(tag, ImmTag(JSVAL_TAG_CLEAR));
+  return actual;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNumber(Condition cond,
+                                                         Register tag) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ma_cmp(tag, ImmTag(JS::detail::ValueUpperInclNumberTag));
+  return cond == Equal ? BelowOrEqual : Above;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testUndefined(
+    Condition cond, const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_UNDEFINED));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testNull(Condition cond,
+                                                       const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_NULL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBoolean(
+    Condition cond, const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_BOOLEAN));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testString(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_STRING));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testSymbol(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_SYMBOL));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigInt(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_BIGINT));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testInt32(Condition cond,
+                                                        const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_INT32));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testObject(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_OBJECT));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDouble(Condition cond,
+                                                         const BaseIndex& src) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  Assembler::Condition actual = (cond == Equal) ? Below : AboveOrEqual;
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(src, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_CLEAR));
+  return actual;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testMagic(
+    Condition cond, const BaseIndex& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JSVAL_TAG_MAGIC));
+  return cond;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testGCThing(
+    Condition cond, const BaseIndex& address) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  ScratchRegisterScope scratch(asMasm());
+  Register tag = extractTag(address, scratch);
+  ma_cmp(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag));
+  return cond == Equal ? AboveOrEqual : Below;
+}
+
+// Unboxing code.
+void MacroAssemblerARMCompat::unboxNonDouble(const ValueOperand& operand,
+                                             Register dest, JSValueType type) {
+  auto movPayloadToDest = [&]() {
+    if (operand.payloadReg() != dest) {
+      ma_mov(operand.payloadReg(), dest, LeaveCC);
+    }
+  };
+  if (!JitOptions.spectreValueMasking) {
+    movPayloadToDest();
+    return;
+  }
+
+  // Spectre mitigation: We zero the payload if the tag does not match the
+  // expected type and if this is a pointer type.
+  if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+    movPayloadToDest();
+    return;
+  }
+
+  // We zero the destination register and move the payload into it if
+  // the tag corresponds to the given type.
+  ma_cmp(operand.typeReg(), ImmType(type));
+  movPayloadToDest();
+  ma_mov(Imm32(0), dest, NotEqual);
+}
+
+void MacroAssemblerARMCompat::unboxNonDouble(const Address& src, Register dest,
+                                             JSValueType type) {
+  ScratchRegisterScope scratch(asMasm());
+  if (!JitOptions.spectreValueMasking) {
+    ma_ldr(ToPayload(src), dest, scratch);
+    return;
+  }
+
+  // Spectre mitigation: We zero the payload if the tag does not match the
+  // expected type and if this is a pointer type.
+  if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+    ma_ldr(ToPayload(src), dest, scratch);
+    return;
+  }
+
+  // We zero the destination register and move the payload into it if
+  // the tag corresponds to the given type.
+  ma_ldr(ToType(src), scratch, scratch);
+  ma_cmp(scratch, ImmType(type));
+  ma_ldr(ToPayload(src), dest, scratch, Offset, Equal);
+  ma_mov(Imm32(0), dest, NotEqual);
+}
+
+void MacroAssemblerARMCompat::unboxNonDouble(const BaseIndex& src,
+                                             Register dest, JSValueType type) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_alu(src.base, lsl(src.index, src.scale), scratch2, OpAdd);
+  Address value(scratch2, src.offset);
+  unboxNonDouble(value, dest, type);
+}
+
+void MacroAssemblerARMCompat::unboxDouble(const ValueOperand& operand,
+                                          FloatRegister dest) {
+  MOZ_ASSERT(dest.isDouble());
+  as_vxfer(operand.payloadReg(), operand.typeReg(), VFPRegister(dest),
+           CoreToFloat);
+}
+
+void MacroAssemblerARMCompat::unboxDouble(const Address& src,
+                                          FloatRegister dest) {
+  MOZ_ASSERT(dest.isDouble());
+  loadDouble(src, dest);
+}
+
+void MacroAssemblerARMCompat::unboxDouble(const BaseIndex& src,
+                                          FloatRegister dest) {
+  MOZ_ASSERT(dest.isDouble());
+  loadDouble(src, dest);
+}
+
+void MacroAssemblerARMCompat::unboxValue(const ValueOperand& src,
+                                         AnyRegister dest, JSValueType type) {
+  if (dest.isFloat()) {
+    Label notInt32, end;
+    asMasm().branchTestInt32(Assembler::NotEqual, src, &notInt32);
+    convertInt32ToDouble(src.payloadReg(), dest.fpu());
+    ma_b(&end);
+    bind(&notInt32);
+    unboxDouble(src, dest.fpu());
+    bind(&end);
+  } else {
+    unboxNonDouble(src, dest.gpr(), type);
+  }
+}
+
+void MacroAssemblerARMCompat::boxDouble(FloatRegister src,
+                                        const ValueOperand& dest,
+                                        FloatRegister) {
+  as_vxfer(dest.payloadReg(), dest.typeReg(), VFPRegister(src), FloatToCore);
+}
+
+void MacroAssemblerARMCompat::boxNonDouble(JSValueType type, Register src,
+                                           const ValueOperand& dest) {
+  if (src != dest.payloadReg()) {
+    ma_mov(src, dest.payloadReg());
+  }
+  ma_mov(ImmType(type), dest.typeReg());
+}
+
+void MacroAssemblerARMCompat::boolValueToDouble(const ValueOperand& operand,
+                                                FloatRegister dest) {
+  VFPRegister d = VFPRegister(dest);
+  loadConstantDouble(1.0, dest);
+  as_cmp(operand.payloadReg(), Imm8(0));
+  // If the source is 0, then subtract the dest from itself, producing 0.
+  as_vsub(d, d, d, Equal);
+}
+
+void MacroAssemblerARMCompat::int32ValueToDouble(const ValueOperand& operand,
+                                                 FloatRegister dest) {
+  // Transfer the integral value to a floating point register.
+  VFPRegister vfpdest = VFPRegister(dest);
+  as_vxfer(operand.payloadReg(), InvalidReg, vfpdest.sintOverlay(),
+           CoreToFloat);
+  // Convert the value to a double.
+  as_vcvt(vfpdest, vfpdest.sintOverlay());
+}
+
+void MacroAssemblerARMCompat::boolValueToFloat32(const ValueOperand& operand,
+                                                 FloatRegister dest) {
+  VFPRegister d = VFPRegister(dest).singleOverlay();
+  loadConstantFloat32(1.0, dest);
+  as_cmp(operand.payloadReg(), Imm8(0));
+  // If the source is 0, then subtract the dest from itself, producing 0.
+  as_vsub(d, d, d, Equal);
+}
+
+void MacroAssemblerARMCompat::int32ValueToFloat32(const ValueOperand& operand,
+                                                  FloatRegister dest) {
+  // Transfer the integral value to a floating point register.
+  VFPRegister vfpdest = VFPRegister(dest).singleOverlay();
+  as_vxfer(operand.payloadReg(), InvalidReg, vfpdest.sintOverlay(),
+           CoreToFloat);
+  // Convert the value to a float.
+  as_vcvt(vfpdest, vfpdest.sintOverlay());
+}
+
+void MacroAssemblerARMCompat::loadConstantFloat32(float f, FloatRegister dest) {
+  ma_vimm_f32(f, dest);
+}
+
+void MacroAssemblerARMCompat::loadInt32OrDouble(const Address& src,
+                                                FloatRegister dest) {
+  Label notInt32, end;
+
+  // If it's an int, convert to a double.
+  {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_ldr(ToType(src), scratch, scratch2);
+    asMasm().branchTestInt32(Assembler::NotEqual, scratch, &notInt32);
+    ma_ldr(ToPayload(src), scratch, scratch2);
+    convertInt32ToDouble(scratch, dest);
+    ma_b(&end);
+  }
+
+  // Not an int, just load as double.
+  bind(&notInt32);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_vldr(src, dest, scratch);
+  }
+  bind(&end);
+}
+
+void MacroAssemblerARMCompat::loadInt32OrDouble(Register base, Register index,
+                                                FloatRegister dest,
+                                                int32_t shift) {
+  Label notInt32, end;
+
+  static_assert(NUNBOX32_PAYLOAD_OFFSET == 0);
+
+  ScratchRegisterScope scratch(asMasm());
+
+  // If it's an int, convert it to double.
+  ma_alu(base, lsl(index, shift), scratch, OpAdd);
+
+  // Since we only have one scratch register, we need to stomp over it with
+  // the tag.
+  ma_ldr(DTRAddr(scratch, DtrOffImm(NUNBOX32_TYPE_OFFSET)), scratch);
+  asMasm().branchTestInt32(Assembler::NotEqual, scratch, &notInt32);
+
+  // Implicitly requires NUNBOX32_PAYLOAD_OFFSET == 0: no offset provided
+  ma_ldr(DTRAddr(base, DtrRegImmShift(index, LSL, shift)), scratch);
+  convertInt32ToDouble(scratch, dest);
+  ma_b(&end);
+
+  // Not an int, just load as double.
+  bind(&notInt32);
+  // First, recompute the offset that had been stored in the scratch register
+  // since the scratch register was overwritten loading in the type.
+  ma_alu(base, lsl(index, shift), scratch, OpAdd);
+  ma_vldr(VFPAddr(scratch, VFPOffImm(0)), dest);
+  bind(&end);
+}
+
+void MacroAssemblerARMCompat::loadConstantDouble(double dp,
+                                                 FloatRegister dest) {
+  ma_vimm(dp, dest);
+}
+
+// Treat the value as a boolean, and set condition codes accordingly.
+Assembler::Condition MacroAssemblerARMCompat::testInt32Truthy(
+    bool truthy, const ValueOperand& operand) {
+  ma_tst(operand.payloadReg(), operand.payloadReg());
+  return truthy ? NonZero : Zero;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBooleanTruthy(
+    bool truthy, const ValueOperand& operand) {
+  ma_tst(operand.payloadReg(), operand.payloadReg());
+  return truthy ? NonZero : Zero;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testDoubleTruthy(
+    bool truthy, FloatRegister reg) {
+  as_vcmpz(VFPRegister(reg));
+  as_vmrs(pc);
+  as_cmp(r0, O2Reg(r0), Overflow);
+  return truthy ? NonZero : Zero;
+}
+
+Register MacroAssemblerARMCompat::extractObject(const Address& address,
+                                                Register scratch) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(ToPayload(address), scratch, scratch2);
+  return scratch;
+}
+
+Register MacroAssemblerARMCompat::extractTag(const Address& address,
+                                             Register scratch) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_ldr(ToType(address), scratch, scratch2);
+  return scratch;
+}
+
+Register MacroAssemblerARMCompat::extractTag(const BaseIndex& address,
+                                             Register scratch) {
+  ma_alu(address.base, lsl(address.index, address.scale), scratch, OpAdd,
+         LeaveCC);
+  return extractTag(Address(scratch, address.offset), scratch);
+}
+
+/////////////////////////////////////////////////////////////////
+// X86/X64-common (ARM too now) interface.
+/////////////////////////////////////////////////////////////////
+void MacroAssemblerARMCompat::storeValue(ValueOperand val, const Address& dst) {
+  SecondScratchRegisterScope scratch2(asMasm());
+  ma_str(val.payloadReg(), ToPayload(dst), scratch2);
+  ma_str(val.typeReg(), ToType(dst), scratch2);
+}
+
+void MacroAssemblerARMCompat::storeValue(ValueOperand val,
+                                         const BaseIndex& dest) {
+  ScratchRegisterScope scratch(asMasm());
+
+  if (isValueDTRDCandidate(val) && Abs(dest.offset) <= 255) {
+    Register tmpIdx;
+    if (dest.offset == 0) {
+      if (dest.scale == TimesOne) {
+        tmpIdx = dest.index;
+      } else {
+        ma_lsl(Imm32(dest.scale), dest.index, scratch);
+        tmpIdx = scratch;
+      }
+      ma_strd(val.payloadReg(), val.typeReg(),
+              EDtrAddr(dest.base, EDtrOffReg(tmpIdx)));
+    } else {
+      ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+      ma_strd(val.payloadReg(), val.typeReg(),
+              EDtrAddr(scratch, EDtrOffImm(dest.offset)));
+    }
+  } else {
+    ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+    storeValue(val, Address(scratch, dest.offset));
+  }
+}
+
+void MacroAssemblerARMCompat::loadValue(const BaseIndex& addr,
+                                        ValueOperand val) {
+  ScratchRegisterScope scratch(asMasm());
+
+  if (isValueDTRDCandidate(val) && Abs(addr.offset) <= 255) {
+    Register tmpIdx;
+    if (addr.offset == 0) {
+      if (addr.scale == TimesOne) {
+        // If the offset register is the same as one of the destination
+        // registers, LDRD's behavior is undefined. Use the scratch
+        // register to avoid this.
+        if (val.aliases(addr.index)) {
+          ma_mov(addr.index, scratch);
+          tmpIdx = scratch;
+        } else {
+          tmpIdx = addr.index;
+        }
+      } else {
+        ma_lsl(Imm32(addr.scale), addr.index, scratch);
+        tmpIdx = scratch;
+      }
+      ma_ldrd(EDtrAddr(addr.base, EDtrOffReg(tmpIdx)), val.payloadReg(),
+              val.typeReg());
+    } else {
+      ma_alu(addr.base, lsl(addr.index, addr.scale), scratch, OpAdd);
+      ma_ldrd(EDtrAddr(scratch, EDtrOffImm(addr.offset)), val.payloadReg(),
+              val.typeReg());
+    }
+  } else {
+    ma_alu(addr.base, lsl(addr.index, addr.scale), scratch, OpAdd);
+    loadValue(Address(scratch, addr.offset), val);
+  }
+}
+
+void MacroAssemblerARMCompat::loadValue(Address src, ValueOperand val) {
+  // TODO: copy this code into a generic function that acts on all sequences
+  // of memory accesses
+  if (isValueDTRDCandidate(val)) {
+    // If the value we want is in two consecutive registers starting with an
+    // even register, they can be combined as a single ldrd.
+    int offset = src.offset;
+    if (offset < 256 && offset > -256) {
+      ma_ldrd(EDtrAddr(src.base, EDtrOffImm(src.offset)), val.payloadReg(),
+              val.typeReg());
+      return;
+    }
+  }
+  // If the value is lower than the type, then we may be able to use an ldm
+  // instruction.
+
+  if (val.payloadReg().code() < val.typeReg().code()) {
+    if (src.offset <= 4 && src.offset >= -8 && (src.offset & 3) == 0) {
+      // Turns out each of the 4 value -8, -4, 0, 4 corresponds exactly
+      // with one of LDM{DB, DA, IA, IB}
+      DTMMode mode;
+      switch (src.offset) {
+        case -8:
+          mode = DB;
+          break;
+        case -4:
+          mode = DA;
+          break;
+        case 0:
+          mode = IA;
+          break;
+        case 4:
+          mode = IB;
+          break;
+        default:
+          MOZ_CRASH("Bogus Offset for LoadValue as DTM");
+      }
+      startDataTransferM(IsLoad, src.base, mode);
+      transferReg(val.payloadReg());
+      transferReg(val.typeReg());
+      finishDataTransfer();
+      return;
+    }
+  }
+
+  loadUnalignedValue(src, val);
+}
+
+void MacroAssemblerARMCompat::loadUnalignedValue(const Address& src,
+                                                 ValueOperand dest) {
+  Address payload = ToPayload(src);
+  Address type = ToType(src);
+
+  // Ensure that loading the payload does not erase the pointer to the Value
+  // in memory.
+  if (type.base != dest.payloadReg()) {
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(payload, dest.payloadReg(), scratch2);
+    ma_ldr(type, dest.typeReg(), scratch2);
+  } else {
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(type, dest.typeReg(), scratch2);
+    ma_ldr(payload, dest.payloadReg(), scratch2);
+  }
+}
+
+void MacroAssemblerARMCompat::tagValue(JSValueType type, Register payload,
+                                       ValueOperand dest) {
+  MOZ_ASSERT(dest.typeReg() != dest.payloadReg());
+  if (payload != dest.payloadReg()) {
+    ma_mov(payload, dest.payloadReg());
+  }
+  ma_mov(ImmType(type), dest.typeReg());
+}
+
+void MacroAssemblerARMCompat::pushValue(ValueOperand val) {
+  ma_push(val.typeReg());
+  ma_push(val.payloadReg());
+}
+
+void MacroAssemblerARMCompat::pushValue(const Address& addr) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  ma_ldr(ToType(addr), scratch, scratch2);
+  ma_push(scratch);
+  ma_ldr(ToPayloadAfterStackPush(addr), scratch, scratch2);
+  ma_push(scratch);
+}
+
+void MacroAssemblerARMCompat::pushValue(const BaseIndex& addr,
+                                        Register scratch) {
+  computeEffectiveAddress(addr, scratch);
+  pushValue(Address(scratch, 0));
+}
+
+void MacroAssemblerARMCompat::popValue(ValueOperand val) {
+  ma_pop(val.payloadReg());
+  ma_pop(val.typeReg());
+}
+
+void MacroAssemblerARMCompat::storePayload(const Value& val,
+                                           const Address& dest) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (val.isGCThing()) {
+    ma_mov(ImmGCPtr(val.toGCThing()), scratch);
+  } else {
+    ma_mov(Imm32(val.toNunboxPayload()), scratch);
+  }
+  ma_str(scratch, ToPayload(dest), scratch2);
+}
+
+void MacroAssemblerARMCompat::storePayload(Register src, const Address& dest) {
+  ScratchRegisterScope scratch(asMasm());
+  ma_str(src, ToPayload(dest), scratch);
+}
+
+void MacroAssemblerARMCompat::storePayload(const Value& val,
+                                           const BaseIndex& dest) {
+  unsigned shift = ScaleToShift(dest.scale);
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  if (val.isGCThing()) {
+    ma_mov(ImmGCPtr(val.toGCThing()), scratch);
+  } else {
+    ma_mov(Imm32(val.toNunboxPayload()), scratch);
+  }
+
+  // If NUNBOX32_PAYLOAD_OFFSET is not zero, the memory operand [base + index
+  // << shift + imm] cannot be encoded into a single instruction, and cannot
+  // be integrated into the as_dtr call.
+  static_assert(NUNBOX32_PAYLOAD_OFFSET == 0);
+
+  // If an offset is used, modify the base so that a [base + index << shift]
+  // instruction format can be used.
+  if (dest.offset != 0) {
+    ma_add(dest.base, Imm32(dest.offset), dest.base, scratch2);
+  }
+
+  as_dtr(IsStore, 32, Offset, scratch,
+         DTRAddr(dest.base, DtrRegImmShift(dest.index, LSL, shift)));
+
+  // Restore the original value of the base, if necessary.
+  if (dest.offset != 0) {
+    ma_sub(dest.base, Imm32(dest.offset), dest.base, scratch);
+  }
+}
+
+void MacroAssemblerARMCompat::storePayload(Register src,
+                                           const BaseIndex& dest) {
+  unsigned shift = ScaleToShift(dest.scale);
+  MOZ_ASSERT(shift < 32);
+
+  ScratchRegisterScope scratch(asMasm());
+
+  // If NUNBOX32_PAYLOAD_OFFSET is not zero, the memory operand [base + index
+  // << shift + imm] cannot be encoded into a single instruction, and cannot
+  // be integrated into the as_dtr call.
+  static_assert(NUNBOX32_PAYLOAD_OFFSET == 0);
+
+  // Save/restore the base if the BaseIndex has an offset, as above.
+  if (dest.offset != 0) {
+    ma_add(dest.base, Imm32(dest.offset), dest.base, scratch);
+  }
+
+  // Technically, shift > -32 can be handle by changing LSL to ASR, but should
+  // never come up, and this is one less code path to get wrong.
+  as_dtr(IsStore, 32, Offset, src,
+         DTRAddr(dest.base, DtrRegImmShift(dest.index, LSL, shift)));
+
+  if (dest.offset != 0) {
+    ma_sub(dest.base, Imm32(dest.offset), dest.base, scratch);
+  }
+}
+
+void MacroAssemblerARMCompat::storeTypeTag(ImmTag tag, const Address& dest) {
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  ma_mov(tag, scratch);
+  ma_str(scratch, ToType(dest), scratch2);
+}
+
+void MacroAssemblerARMCompat::storeTypeTag(ImmTag tag, const BaseIndex& dest) {
+  Register base = dest.base;
+  Register index = dest.index;
+  unsigned shift = ScaleToShift(dest.scale);
+
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  MOZ_ASSERT(base != scratch && base != scratch2);
+  MOZ_ASSERT(index != scratch && index != scratch2);
+
+  ma_add(base, Imm32(dest.offset + NUNBOX32_TYPE_OFFSET), scratch2, scratch);
+  ma_mov(tag, scratch);
+  ma_str(scratch, DTRAddr(scratch2, DtrRegImmShift(index, LSL, shift)));
+}
+
+void MacroAssemblerARM::ma_call(ImmPtr dest) {
+  ma_movPatchable(dest, CallReg, Always);
+  as_blx(CallReg);
+}
+
+void MacroAssemblerARMCompat::breakpoint() { as_bkpt(); }
+
+void MacroAssemblerARMCompat::simulatorStop(const char* msg) {
+#ifdef JS_SIMULATOR_ARM
+  MOZ_ASSERT(sizeof(char*) == 4);
+  writeInst(0xefffffff);
+  writeInst((int)msg);
+#endif
+}
+
+void MacroAssemblerARMCompat::ensureDouble(const ValueOperand& source,
+                                           FloatRegister dest, Label* failure) {
+  Label isDouble, done;
+  asMasm().branchTestDouble(Assembler::Equal, source.typeReg(), &isDouble);
+  asMasm().branchTestInt32(Assembler::NotEqual, source.typeReg(), failure);
+
+  convertInt32ToDouble(source.payloadReg(), dest);
+  jump(&done);
+
+  bind(&isDouble);
+  unboxDouble(source, dest);
+
+  bind(&done);
+}
+
+void MacroAssemblerARMCompat::breakpoint(Condition cc) {
+  ma_ldr(DTRAddr(r12, DtrRegImmShift(r12, LSL, 0, IsDown)), r12, Offset, cc);
+}
+
+void MacroAssemblerARMCompat::checkStackAlignment() {
+  asMasm().assertStackAlignment(ABIStackAlignment);
+}
+
+void MacroAssemblerARMCompat::handleFailureWithHandlerTail(
+    Label* profilerExitTail, Label* bailoutTail) {
+  // Reserve space for exception information.
+  int size = (sizeof(ResumeFromException) + 7) & ~7;
+
+  Imm8 size8(size);
+  as_sub(sp, sp, size8);
+  ma_mov(sp, r0);
+
+  // Call the handler.
+  using Fn = void (*)(ResumeFromException * rfe);
+  asMasm().setupUnalignedABICall(r1);
+  asMasm().passABIArg(r0);
+  asMasm().callWithABI<Fn, HandleException>(
+      MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+  Label entryFrame;
+  Label catch_;
+  Label finally;
+  Label returnBaseline;
+  Label returnIon;
+  Label bailout;
+  Label wasm;
+  Label wasmCatch;
+
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfKind()), r0, scratch);
+  }
+
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::EntryFrame), &entryFrame);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch),
+                    &catch_);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally),
+                    &finally);
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::ForcedReturnBaseline),
+                    &returnBaseline);
+  asMasm().branch32(Assembler::Equal, r0,
+                    Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout),
+                    &bailout);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Wasm),
+                    &wasm);
+  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch),
+                    &wasmCatch);
+
+  breakpoint();  // Invalid kind.
+
+  // No exception handler. Load the error value, restore state and return from
+  // the entry frame.
+  bind(&entryFrame);
+  asMasm().moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+
+  // We're going to be returning by the ion calling convention, which returns
+  // by ??? (for now, I think ldr pc, [sp]!)
+  as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(4)));
+
+  // If we found a catch handler, this must be a baseline frame. Restore state
+  // and jump to the catch block.
+  bind(&catch_);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfTarget()), r0, scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+  jump(r0);
+
+  // If we found a finally block, this must be a baseline frame. Push two
+  // values expected by the finally block: the exception and BooleanValue(true).
+  bind(&finally);
+  ValueOperand exception = ValueOperand(r1, r2);
+  loadValue(Operand(sp, ResumeFromException::offsetOfException()), exception);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfTarget()), r0, scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+
+  pushValue(exception);
+  pushValue(BooleanValue(true));
+  jump(r0);
+
+  // Return BaselineFrame->returnValue() to the caller.
+  // Used in debug mode and for GeneratorReturn.
+  Label profilingInstrumentation;
+  bind(&returnBaseline);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+  loadValue(Address(r11, BaselineFrame::reverseOffsetOfReturnValue()),
+            JSReturnOperand);
+  jump(&profilingInstrumentation);
+
+  // Return the given value to the caller.
+  bind(&returnIon);
+  loadValue(Address(sp, ResumeFromException::offsetOfException()),
+            JSReturnOperand);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+
+  // If profiling is enabled, then update the lastProfilingFrame to refer to
+  // caller frame before returning. This code is shared by ForcedReturnIon
+  // and ForcedReturnBaseline.
+  bind(&profilingInstrumentation);
+  {
+    Label skipProfilingInstrumentation;
+    // Test if profiler enabled.
+    AbsoluteAddress addressOfEnabled(
+        asMasm().runtime()->geckoProfiler().addressOfEnabled());
+    asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
+                      &skipProfilingInstrumentation);
+    jump(profilerExitTail);
+    bind(&skipProfilingInstrumentation);
+  }
+
+  ma_mov(r11, sp);
+  pop(r11);
+  ret();
+
+  // If we are bailing out to baseline to handle an exception, jump to the
+  // bailout tail stub. Load 1 (true) in ReturnReg to indicate success.
+  bind(&bailout);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfBailoutInfo()), r2,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+    ma_mov(Imm32(1), ReturnReg);
+  }
+  jump(bailoutTail);
+
+  // If we are throwing and the innermost frame was a wasm frame, reset SP and
+  // FP; SP is pointing to the unwound return address to the wasm entry, so
+  // we can just ret().
+  bind(&wasm);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+    ma_mov(Imm32(int32_t(wasm::FailInstanceReg)), InstanceReg);
+  }
+  as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(4)));
+
+  // Found a wasm catch handler, restore state and jump to it.
+  bind(&wasmCatch);
+  {
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(Address(sp, ResumeFromException::offsetOfTarget()), r1, scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfFramePointer()), r11,
+           scratch);
+    ma_ldr(Address(sp, ResumeFromException::offsetOfStackPointer()), sp,
+           scratch);
+  }
+  jump(r1);
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testStringTruthy(
+    bool truthy, const ValueOperand& value) {
+  Register string = value.payloadReg();
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  ma_dtr(IsLoad, string, Imm32(JSString::offsetOfLength()), scratch, scratch2);
+  as_cmp(scratch, Imm8(0));
+  return truthy ? Assembler::NotEqual : Assembler::Equal;
+}
+
+Assembler::Condition MacroAssemblerARMCompat::testBigIntTruthy(
+    bool truthy, const ValueOperand& value) {
+  Register bi = value.payloadReg();
+  ScratchRegisterScope scratch(asMasm());
+  SecondScratchRegisterScope scratch2(asMasm());
+
+  ma_dtr(IsLoad, bi, Imm32(BigInt::offsetOfDigitLength()), scratch, scratch2);
+  as_cmp(scratch, Imm8(0));
+  return truthy ? Assembler::NotEqual : Assembler::Equal;
+}
+
+void MacroAssemblerARMCompat::floor(FloatRegister input, Register output,
+                                    Label* bail) {
+  Label handleZero;
+  Label handleNeg;
+  Label fin;
+
+  ScratchDoubleScope scratchDouble(asMasm());
+
+  compareDouble(input, NoVFPRegister);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handleNeg, Assembler::Signed);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+
+  // The argument is a positive number, truncation is the path to glory. Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+  ma_vcvt_F64_U32(input, scratchDouble.uintOverlay());
+  ma_vxfer(scratchDouble.uintOverlay(), output);
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(&fin);
+
+  bind(&handleZero);
+  // Move the top word of the double into the output reg, if it is non-zero,
+  // then the original value was -0.0.
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  bind(&handleNeg);
+  // Negative case, negate, then start dancing.
+  ma_vneg(input, input);
+  ma_vcvt_F64_U32(input, scratchDouble.uintOverlay());
+  ma_vxfer(scratchDouble.uintOverlay(), output);
+  ma_vcvt_U32_F64(scratchDouble.uintOverlay(), scratchDouble);
+  compareDouble(scratchDouble, input);
+  as_add(output, output, Imm8(1), LeaveCC, NotEqual);
+  // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
+  // result will still be a negative number.
+  as_rsb(output, output, Imm8(0), SetCC);
+  // Flip the negated input back to its original value.
+  ma_vneg(input, input);
+  // If the result looks non-negative, then this value didn't actually fit
+  // into the int range, and special handling is required. Zero is also caught
+  // by this case, but floor of a negative number should never be zero.
+  ma_b(bail, NotSigned);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::floorf(FloatRegister input, Register output,
+                                     Label* bail) {
+  Label handleZero;
+  Label handleNeg;
+  Label fin;
+  compareFloat(input, NoVFPRegister);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handleNeg, Assembler::Signed);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+
+  // The argument is a positive number, truncation is the path to glory; Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+  {
+    ScratchFloat32Scope scratch(asMasm());
+    ma_vcvt_F32_U32(input, scratch.uintOverlay());
+    ma_vxfer(VFPRegister(scratch).uintOverlay(), output);
+  }
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(&fin);
+
+  bind(&handleZero);
+  // Move the top word of the double into the output reg, if it is non-zero,
+  // then the original value was -0.0.
+  as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore,
+           Always, 0);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  bind(&handleNeg);
+  // Negative case, negate, then start dancing.
+  {
+    ScratchFloat32Scope scratch(asMasm());
+    ma_vneg_f32(input, input);
+    ma_vcvt_F32_U32(input, scratch.uintOverlay());
+    ma_vxfer(VFPRegister(scratch).uintOverlay(), output);
+    ma_vcvt_U32_F32(scratch.uintOverlay(), scratch);
+    compareFloat(scratch, input);
+    as_add(output, output, Imm8(1), LeaveCC, NotEqual);
+  }
+  // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
+  // result will still be a negative number.
+  as_rsb(output, output, Imm8(0), SetCC);
+  // Flip the negated input back to its original value.
+  ma_vneg_f32(input, input);
+  // If the result looks non-negative, then this value didn't actually fit
+  // into the int range, and special handling is required. Zero is also caught
+  // by this case, but floor of a negative number should never be zero.
+  ma_b(bail, NotSigned);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::ceil(FloatRegister input, Register output,
+                                   Label* bail) {
+  Label handleZero;
+  Label handlePos;
+  Label fin;
+
+  compareDouble(input, NoVFPRegister);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handlePos, Assembler::NotSigned);
+
+  ScratchDoubleScope scratchDouble(asMasm());
+
+  // We are in the ]-Inf; 0[ range
+  // If we are in the ]-1; 0[ range => bailout
+  loadConstantDouble(-1.0, scratchDouble);
+  compareDouble(input, scratchDouble);
+  ma_b(bail, Assembler::GreaterThan);
+
+  // We are in the ]-Inf; -1] range: ceil(x) == -floor(-x) and floor can be
+  // computed with direct truncation here (x > 0).
+  ma_vneg(input, scratchDouble);
+  FloatRegister ScratchUIntReg = scratchDouble.uintOverlay();
+  ma_vcvt_F64_U32(scratchDouble, ScratchUIntReg);
+  ma_vxfer(ScratchUIntReg, output);
+  ma_neg(output, output, SetCC);
+  ma_b(bail, NotSigned);
+  ma_b(&fin);
+
+  // Test for 0.0 / -0.0: if the top word of the input double is not zero,
+  // then it was -0 and we need to bail out.
+  bind(&handleZero);
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  // We are in the ]0; +inf] range: truncate integer values, maybe add 1 for
+  // non integer values, maybe bail if overflow.
+  bind(&handlePos);
+  ma_vcvt_F64_U32(input, ScratchUIntReg);
+  ma_vxfer(ScratchUIntReg, output);
+  ma_vcvt_U32_F64(ScratchUIntReg, scratchDouble);
+  compareDouble(scratchDouble, input);
+  as_add(output, output, Imm8(1), LeaveCC, NotEqual);
+  // Bail out if the add overflowed or the result is non positive.
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(bail, Zero);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::ceilf(FloatRegister input, Register output,
+                                    Label* bail) {
+  Label handleZero;
+  Label handlePos;
+  Label fin;
+
+  compareFloat(input, NoVFPRegister);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handlePos, Assembler::NotSigned);
+
+  // We are in the ]-Inf; 0[ range
+  // If we are in the ]-1; 0[ range => bailout
+  {
+    ScratchFloat32Scope scratch(asMasm());
+    loadConstantFloat32(-1.f, scratch);
+    compareFloat(input, scratch);
+    ma_b(bail, Assembler::GreaterThan);
+  }
+
+  // We are in the ]-Inf; -1] range: ceil(x) == -floor(-x) and floor can be
+  // computed with direct truncation here (x > 0).
+  {
+    ScratchDoubleScope scratchDouble(asMasm());
+    FloatRegister scratchFloat = scratchDouble.asSingle();
+    FloatRegister scratchUInt = scratchDouble.uintOverlay();
+
+    ma_vneg_f32(input, scratchFloat);
+    ma_vcvt_F32_U32(scratchFloat, scratchUInt);
+    ma_vxfer(scratchUInt, output);
+    ma_neg(output, output, SetCC);
+    ma_b(bail, NotSigned);
+    ma_b(&fin);
+  }
+
+  // Test for 0.0 / -0.0: if the top word of the input double is not zero,
+  // then it was -0 and we need to bail out.
+  bind(&handleZero);
+  as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore,
+           Always, 0);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  // We are in the ]0; +inf] range: truncate integer values, maybe add 1 for
+  // non integer values, maybe bail if overflow.
+  bind(&handlePos);
+  {
+    ScratchDoubleScope scratchDouble(asMasm());
+    FloatRegister scratchFloat = scratchDouble.asSingle();
+    FloatRegister scratchUInt = scratchDouble.uintOverlay();
+
+    ma_vcvt_F32_U32(input, scratchUInt);
+    ma_vxfer(scratchUInt, output);
+    ma_vcvt_U32_F32(scratchUInt, scratchFloat);
+    compareFloat(scratchFloat, input);
+    as_add(output, output, Imm8(1), LeaveCC, NotEqual);
+
+    // Bail on overflow or non-positive result.
+    ma_mov(output, output, SetCC);
+    ma_b(bail, Signed);
+    ma_b(bail, Zero);
+  }
+
+  bind(&fin);
+}
+
+CodeOffset MacroAssemblerARMCompat::toggledJump(Label* label) {
+  // Emit a B that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
+  BufferOffset b = ma_b(label, Always);
+  CodeOffset ret(b.getOffset());
+  return ret;
+}
+
+CodeOffset MacroAssemblerARMCompat::toggledCall(JitCode* target, bool enabled) {
+  BufferOffset bo = nextOffset();
+  addPendingJump(bo, ImmPtr(target->raw()), RelocationKind::JITCODE);
+  ScratchRegisterScope scratch(asMasm());
+  ma_movPatchable(ImmPtr(target->raw()), scratch, Always);
+  if (enabled) {
+    ma_blx(scratch);
+  } else {
+    ma_nop();
+  }
+  return CodeOffset(bo.getOffset());
+}
+
+void MacroAssemblerARMCompat::round(FloatRegister input, Register output,
+                                    Label* bail, FloatRegister tmp) {
+  Label handleZero;
+  Label handleNeg;
+  Label fin;
+
+  ScratchDoubleScope scratchDouble(asMasm());
+
+  // Do a compare based on the original value, then do most other things based
+  // on the shifted value.
+  ma_vcmpz(input);
+  // Since we already know the sign bit, flip all numbers to be positive,
+  // stored in tmp.
+  ma_vabs(input, tmp);
+  as_vmrs(pc);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handleNeg, Assembler::Signed);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+
+  // The argument is a positive number, truncation is the path to glory; Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+
+  // Add the biggest number less than 0.5 (not 0.5, because adding that to
+  // the biggest number less than 0.5 would undesirably round up to 1), and
+  // store the result into tmp.
+  loadConstantDouble(GetBiggestNumberLessThan(0.5), scratchDouble);
+  ma_vadd(scratchDouble, tmp, tmp);
+
+  ma_vcvt_F64_U32(tmp, scratchDouble.uintOverlay());
+  ma_vxfer(VFPRegister(scratchDouble).uintOverlay(), output);
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(&fin);
+
+  bind(&handleZero);
+  // Move the top word of the double into the output reg, if it is non-zero,
+  // then the original value was -0.0
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  bind(&handleNeg);
+  // Negative case, negate, then start dancing. This number may be positive,
+  // since we added 0.5.
+
+  // Add 0.5 to negative numbers, store the result into tmp
+  loadConstantDouble(0.5, scratchDouble);
+  ma_vadd(scratchDouble, tmp, tmp);
+
+  ma_vcvt_F64_U32(tmp, scratchDouble.uintOverlay());
+  ma_vxfer(VFPRegister(scratchDouble).uintOverlay(), output);
+
+  // -output is now a correctly rounded value, unless the original value was
+  // exactly halfway between two integers, at which point, it has been rounded
+  // away from zero, when it should be rounded towards \infty.
+  ma_vcvt_U32_F64(scratchDouble.uintOverlay(), scratchDouble);
+  compareDouble(scratchDouble, tmp);
+  as_sub(output, output, Imm8(1), LeaveCC, Equal);
+  // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
+  // result will still be a negative number.
+  as_rsb(output, output, Imm8(0), SetCC);
+
+  // If the result looks non-negative, then this value didn't actually fit
+  // into the int range, and special handling is required, or it was zero,
+  // which means the result is actually -0.0 which also requires special
+  // handling.
+  ma_b(bail, NotSigned);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::roundf(FloatRegister input, Register output,
+                                     Label* bail, FloatRegister tmp) {
+  Label handleZero;
+  Label handleNeg;
+  Label fin;
+
+  ScratchFloat32Scope scratchFloat(asMasm());
+
+  // Do a compare based on the original value, then do most other things based
+  // on the shifted value.
+  compareFloat(input, NoVFPRegister);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handleNeg, Assembler::Signed);
+
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+
+  // The argument is a positive number, truncation is the path to glory; Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+
+  // Add the biggest number less than 0.5f (not 0.5f, because adding that to
+  // the biggest number less than 0.5f would undesirably round up to 1), and
+  // store the result into tmp.
+  loadConstantFloat32(GetBiggestNumberLessThan(0.5f), scratchFloat);
+  ma_vadd_f32(scratchFloat, input, tmp);
+
+  // Note: it doesn't matter whether x + .5 === x or not here, as it doesn't
+  // affect the semantics of the float to unsigned conversion (in particular,
+  // we are not applying any fixup after the operation).
+  ma_vcvt_F32_U32(tmp, scratchFloat.uintOverlay());
+  ma_vxfer(VFPRegister(scratchFloat).uintOverlay(), output);
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+  ma_b(&fin);
+
+  bind(&handleZero);
+
+  // Move the whole float32 into the output reg, if it is non-zero, then the
+  // original value was -0.0.
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 0);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  bind(&handleNeg);
+
+  // Add 0.5 to negative numbers, storing the result into tmp.
+  ma_vneg_f32(input, tmp);
+  loadConstantFloat32(0.5f, scratchFloat);
+  ma_vadd_f32(tmp, scratchFloat, scratchFloat);
+
+  // Adding 0.5 to a float input has chances to yield the wrong result, if
+  // the input is too large. In this case, skip the -1 adjustment made below.
+  compareFloat(scratchFloat, tmp);
+
+  // Negative case, negate, then start dancing. This number may be positive,
+  // since we added 0.5.
+  // /!\ The conditional jump afterwards depends on these two instructions
+  //     *not* setting the status flags. They need to not change after the
+  //     comparison above.
+  ma_vcvt_F32_U32(scratchFloat, tmp.uintOverlay());
+  ma_vxfer(VFPRegister(tmp).uintOverlay(), output);
+
+  Label flipSign;
+  ma_b(&flipSign, Equal);
+
+  // -output is now a correctly rounded value, unless the original value was
+  // exactly halfway between two integers, at which point, it has been rounded
+  // away from zero, when it should be rounded towards \infty.
+  ma_vcvt_U32_F32(tmp.uintOverlay(), tmp);
+  compareFloat(tmp, scratchFloat);
+  as_sub(output, output, Imm8(1), LeaveCC, Equal);
+
+  // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
+  // result will still be a negative number.
+  bind(&flipSign);
+  as_rsb(output, output, Imm8(0), SetCC);
+
+  // If the result looks non-negative, then this value didn't actually fit
+  // into the int range, and special handling is required, or it was zero,
+  // which means the result is actually -0.0 which also requires special
+  // handling.
+  ma_b(bail, NotSigned);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::trunc(FloatRegister input, Register output,
+                                    Label* bail) {
+  Label handleZero;
+  Label handlePos;
+  Label fin;
+
+  compareDouble(input, NoVFPRegister);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handlePos, Assembler::NotSigned);
+
+  ScratchDoubleScope scratchDouble(asMasm());
+
+  // We are in the ]-Inf; 0[ range
+  // If we are in the ]-1; 0[ range => bailout
+  loadConstantDouble(-1.0, scratchDouble);
+  compareDouble(input, scratchDouble);
+  ma_b(bail, Assembler::GreaterThan);
+
+  // We are in the ]-Inf; -1] range: trunc(x) == -floor(-x) and floor can be
+  // computed with direct truncation here (x > 0).
+  ma_vneg(input, scratchDouble);
+  ma_vcvt_F64_U32(scratchDouble, scratchDouble.uintOverlay());
+  ma_vxfer(scratchDouble.uintOverlay(), output);
+  ma_neg(output, output, SetCC);
+  ma_b(bail, NotSigned);
+  ma_b(&fin);
+
+  // Test for 0.0 / -0.0: if the top word of the input double is not zero,
+  // then it was -0 and we need to bail out.
+  bind(&handleZero);
+  as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  // We are in the ]0; +inf] range: truncation is the path to glory. Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  // that clamps to INT_MAX.
+  bind(&handlePos);
+  ma_vcvt_F64_U32(input, scratchDouble.uintOverlay());
+  ma_vxfer(scratchDouble.uintOverlay(), output);
+  ma_mov(output, output, SetCC);
+  ma_b(bail, Signed);
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::truncf(FloatRegister input, Register output,
+                                     Label* bail) {
+  Label handleZero;
+  Label handlePos;
+  Label fin;
+
+  compareFloat(input, NoVFPRegister);
+  // NaN is always a bail condition, just bail directly.
+  ma_b(bail, Assembler::Overflow);
+  ma_b(&handleZero, Assembler::Equal);
+  ma_b(&handlePos, Assembler::NotSigned);
+
+  // We are in the ]-Inf; 0[ range
+  // If we are in the ]-1; 0[ range => bailout
+  {
+    ScratchFloat32Scope scratch(asMasm());
+    loadConstantFloat32(-1.f, scratch);
+    compareFloat(input, scratch);
+    ma_b(bail, Assembler::GreaterThan);
+  }
+
+  // We are in the ]-Inf; -1] range: trunc(x) == -floor(-x) and floor can be
+  // computed with direct truncation here (x > 0).
+  {
+    ScratchDoubleScope scratchDouble(asMasm());
+    FloatRegister scratchFloat = scratchDouble.asSingle();
+    FloatRegister scratchUInt = scratchDouble.uintOverlay();
+
+    ma_vneg_f32(input, scratchFloat);
+    ma_vcvt_F32_U32(scratchFloat, scratchUInt);
+    ma_vxfer(scratchUInt, output);
+    ma_neg(output, output, SetCC);
+    ma_b(bail, NotSigned);
+    ma_b(&fin);
+  }
+
+  // Test for 0.0 / -0.0: if the top word of the input double is not zero,
+  // then it was -0 and we need to bail out.
+  bind(&handleZero);
+  as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore,
+           Always, 0);
+  as_cmp(output, Imm8(0));
+  ma_b(bail, NonZero);
+  ma_b(&fin);
+
+  // We are in the ]0; +inf] range: truncation is the path to glory; Since
+  // it is known to be > 0.0, explicitly convert to a larger range, then a
+  // value that rounds to INT_MAX is explicitly different from an argument
+  bind(&handlePos);
+  {
+    // The argument is a positive number,
+    // that clamps to INT_MAX.
+    {
+      ScratchFloat32Scope scratch(asMasm());
+      ma_vcvt_F32_U32(input, scratch.uintOverlay());
+      ma_vxfer(VFPRegister(scratch).uintOverlay(), output);
+    }
+    ma_mov(output, output, SetCC);
+    ma_b(bail, Signed);
+  }
+
+  bind(&fin);
+}
+
+void MacroAssemblerARMCompat::profilerEnterFrame(Register framePtr,
+                                                 Register scratch) {
+  asMasm().loadJSContext(scratch);
+  loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
+  storePtr(framePtr,
+           Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
+  storePtr(ImmPtr(nullptr),
+           Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
+}
+
+void MacroAssemblerARMCompat::profilerExitFrame() {
+  jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail());
+}
+
+MacroAssembler& MacroAssemblerARM::asMasm() {
+  return *static_cast<MacroAssembler*>(this);
+}
+
+const MacroAssembler& MacroAssemblerARM::asMasm() const {
+  return *static_cast<const MacroAssembler*>(this);
+}
+
+MacroAssembler& MacroAssemblerARMCompat::asMasm() {
+  return *static_cast<MacroAssembler*>(this);
+}
+
+const MacroAssembler& MacroAssemblerARMCompat::asMasm() const {
+  return *static_cast<const MacroAssembler*>(this);
+}
+
+void MacroAssembler::subFromStackPtr(Imm32 imm32) {
+  ScratchRegisterScope scratch(*this);
+  if (imm32.value) {
+    ma_sub(imm32, sp, scratch);
+  }
+}
+
+//{{{ check_macroassembler_style
+// ===============================================================
+// MacroAssembler high-level usage.
+
+void MacroAssembler::flush() { Assembler::flush(); }
+
+void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); }
+
+// ===============================================================
+// Stack manipulation functions.
+
+size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) {
+  return set.gprs().size() * sizeof(intptr_t) + set.fpus().getPushSizeInBytes();
+}
+
+void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
+  mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
+
+  int32_t diffF = set.fpus().getPushSizeInBytes();
+  int32_t diffG = set.gprs().size() * sizeof(intptr_t);
+
+  if (set.gprs().size() > 1) {
+    adjustFrame(diffG);
+    startDataTransferM(IsStore, StackPointer, DB, WriteBack);
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      transferReg(*iter);
+    }
+    finishDataTransfer();
+  } else {
+    reserveStack(diffG);
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      storePtr(*iter, Address(StackPointer, diffG));
+    }
+  }
+  MOZ_ASSERT(diffG == 0);
+
+  // It's possible that the logic is just fine as it is if the reduced set
+  // maps SIMD pairs to plain doubles and transferMultipleByRuns() stores
+  // and loads doubles.
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  adjustFrame(diffF);
+  diffF += transferMultipleByRuns(set.fpus(), IsStore, StackPointer, DB);
+  MOZ_ASSERT(diffF == 0);
+
+  MOZ_ASSERT(framePushed() - framePushedInitial ==
+             PushRegsInMaskSizeInBytes(set));
+}
+
+void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
+                                     Register scratch) {
+  mozilla::DebugOnly<size_t> offsetInitial = dest.offset;
+
+  int32_t diffF = set.fpus().getPushSizeInBytes();
+  int32_t diffG = set.gprs().size() * sizeof(intptr_t);
+
+  MOZ_ASSERT(dest.offset >= diffF + diffG);
+
+  if (set.gprs().size() > 1) {
+    computeEffectiveAddress(dest, scratch);
+
+    startDataTransferM(IsStore, scratch, DB, WriteBack);
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      dest.offset -= sizeof(intptr_t);
+      transferReg(*iter);
+    }
+    finishDataTransfer();
+  } else {
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      dest.offset -= sizeof(intptr_t);
+      storePtr(*iter, dest);
+    }
+  }
+  MOZ_ASSERT(diffG == 0);
+  (void)diffG;
+
+  // See above.
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  MOZ_ASSERT(diffF >= 0);
+  if (diffF > 0) {
+    computeEffectiveAddress(dest, scratch);
+    diffF += transferMultipleByRuns(set.fpus(), IsStore, scratch, DB);
+  }
+
+  MOZ_ASSERT(diffF == 0);
+
+  // "The amount of space actually used does not exceed what
+  // `PushRegsInMaskSizeInBytes` claims will be used."
+  MOZ_ASSERT(offsetInitial - dest.offset <= PushRegsInMaskSizeInBytes(set));
+}
+
+void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
+                                         LiveRegisterSet ignore) {
+  mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
+
+  int32_t diffG = set.gprs().size() * sizeof(intptr_t);
+  int32_t diffF = set.fpus().getPushSizeInBytes();
+  const int32_t reservedG = diffG;
+  const int32_t reservedF = diffF;
+
+  // See above.
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  // ARM can load multiple registers at once, but only if we want back all
+  // the registers we previously saved to the stack.
+  if (ignore.emptyFloat()) {
+    diffF -= transferMultipleByRuns(set.fpus(), IsLoad, StackPointer, IA);
+    adjustFrame(-reservedF);
+  } else {
+    LiveFloatRegisterSet fpset(set.fpus().reduceSetForPush());
+    LiveFloatRegisterSet fpignore(ignore.fpus().reduceSetForPush());
+    for (FloatRegisterBackwardIterator iter(fpset); iter.more(); ++iter) {
+      diffF -= (*iter).size();
+      if (!fpignore.has(*iter)) {
+        loadDouble(Address(StackPointer, diffF), *iter);
+      }
+    }
+    freeStack(reservedF);
+  }
+  MOZ_ASSERT(diffF == 0);
+
+  if (set.gprs().size() > 1 && ignore.emptyGeneral()) {
+    startDataTransferM(IsLoad, StackPointer, IA, WriteBack);
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      transferReg(*iter);
+    }
+    finishDataTransfer();
+    adjustFrame(-reservedG);
+  } else {
+    for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
+         ++iter) {
+      diffG -= sizeof(intptr_t);
+      if (!ignore.has(*iter)) {
+        loadPtr(Address(StackPointer, diffG), *iter);
+      }
+    }
+    freeStack(reservedG);
+  }
+  MOZ_ASSERT(diffG == 0);
+
+  MOZ_ASSERT(framePushedInitial - framePushed() ==
+             PushRegsInMaskSizeInBytes(set));
+}
+
+void MacroAssembler::Push(Register reg) {
+  push(reg);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const Imm32 imm) {
+  push(imm);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmWord imm) {
+  push(imm);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmPtr imm) {
+  Push(ImmWord(uintptr_t(imm.value)));
+}
+
+void MacroAssembler::Push(const ImmGCPtr ptr) {
+  push(ptr);
+  adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(FloatRegister reg) {
+  VFPRegister r = VFPRegister(reg);
+  ma_vpush(VFPRegister(reg));
+  adjustFrame(r.size());
+}
+
+void MacroAssembler::PushBoxed(FloatRegister reg) {
+  MOZ_ASSERT(reg.isDouble());
+  Push(reg);
+}
+
+void MacroAssembler::Pop(Register reg) {
+  ma_pop(reg);
+  adjustFrame(-sizeof(intptr_t));
+}
+
+void MacroAssembler::Pop(FloatRegister reg) {
+  ma_vpop(reg);
+  adjustFrame(-reg.size());
+}
+
+void MacroAssembler::Pop(const ValueOperand& val) {
+  popValue(val);
+  adjustFrame(-sizeof(Value));
+}
+
+void MacroAssembler::PopStackPtr() {
+  as_dtr(IsLoad, 32, Offset, sp, DTRAddr(sp, DtrOffImm(0)));
+  adjustFrame(-sizeof(intptr_t));
+}
+
+// ===============================================================
+// Simple call functions.
+
+CodeOffset MacroAssembler::call(Register reg) {
+  as_blx(reg);
+  return CodeOffset(currentOffset());
+}
+
+CodeOffset MacroAssembler::call(Label* label) {
+  // For now, assume that it'll be nearby.
+  as_bl(label, Always);
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); }
+
+void MacroAssembler::call(ImmPtr imm) {
+  BufferOffset bo = m_buffer.nextOffset();
+  addPendingJump(bo, imm, RelocationKind::HARDCODED);
+  ma_call(imm);
+}
+
+CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) {
+  movePtr(imm, CallReg);
+  return call(CallReg);
+}
+
+void MacroAssembler::call(const Address& addr) {
+  loadPtr(addr, CallReg);
+  call(CallReg);
+}
+
+void MacroAssembler::call(JitCode* c) {
+  BufferOffset bo = m_buffer.nextOffset();
+  addPendingJump(bo, ImmPtr(c->raw()), RelocationKind::JITCODE);
+  ScratchRegisterScope scratch(*this);
+  ma_movPatchable(ImmPtr(c->raw()), scratch, Always);
+  callJitNoProfiler(scratch);
+}
+
+CodeOffset MacroAssembler::callWithPatch() {
+  // The caller ensures that the call is always in range using thunks (below)
+  // as necessary.
+  as_bl(BOffImm(), Always, /* documentation */ nullptr);
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
+  BufferOffset inst(callerOffset - 4);
+  BOffImm off = BufferOffset(calleeOffset).diffB<BOffImm>(inst);
+  MOZ_RELEASE_ASSERT(!off.isInvalid(),
+                     "Failed to insert necessary far jump islands");
+  as_bl(off, Always, inst);
+}
+
+CodeOffset MacroAssembler::farJumpWithPatch() {
+  static_assert(32 * 1024 * 1024 - JumpImmediateRange >
+                    wasm::MaxFuncs * 3 * sizeof(Instruction),
+                "always enough space for thunks");
+
+  // The goal of the thunk is to be able to jump to any address without the
+  // usual 32MiB branch range limitation. Additionally, to make the thunk
+  // simple to use, the thunk does not use the constant pool or require
+  // patching an absolute address. Instead, a relative offset is used which
+  // can be patched during compilation.
+
+  // Inhibit pools since these three words must be contiguous so that the offset
+  // calculations below are valid.
+  AutoForbidPoolsAndNops afp(this, 3);
+
+  // When pc is used, the read value is the address of the instruction + 8.
+  // This is exactly the address of the uint32 word we want to load.
+  ScratchRegisterScope scratch(*this);
+  ma_ldr(DTRAddr(pc, DtrOffImm(0)), scratch);
+
+  // Branch by making pc the destination register.
+  ma_add(pc, scratch, pc, LeaveCC, Always);
+
+  // Allocate space which will be patched by patchFarJump().
+  CodeOffset farJump(currentOffset());
+  writeInst(UINT32_MAX);
+
+  return farJump;
+}
+
+void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) {
+  uint32_t* u32 =
+      reinterpret_cast<uint32_t*>(editSrc(BufferOffset(farJump.offset())));
+  MOZ_ASSERT(*u32 == UINT32_MAX);
+
+  uint32_t addOffset = farJump.offset() - 4;
+  MOZ_ASSERT(editSrc(BufferOffset(addOffset))->is<InstALU>());
+
+  // When pc is read as the operand of the add, its value is the address of
+  // the add instruction + 8.
+  *u32 = (targetOffset - addOffset) - 8;
+}
+
+CodeOffset MacroAssembler::nopPatchableToCall() {
+  AutoForbidPoolsAndNops afp(this,
+                             /* max number of instructions in scope = */ 1);
+  ma_nop();
+  return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) {
+  uint8_t* inst = call - 4;
+  MOZ_ASSERT(reinterpret_cast<Instruction*>(inst)->is<InstBLImm>() ||
+             reinterpret_cast<Instruction*>(inst)->is<InstNOP>());
+
+  new (inst) InstBLImm(BOffImm(target - inst), Assembler::Always);
+}
+
+void MacroAssembler::patchCallToNop(uint8_t* call) {
+  uint8_t* inst = call - 4;
+  MOZ_ASSERT(reinterpret_cast<Instruction*>(inst)->is<InstBLImm>() ||
+             reinterpret_cast<Instruction*>(inst)->is<InstNOP>());
+  new (inst) InstNOP();
+}
+
+void MacroAssembler::pushReturnAddress() { push(lr); }
+
+void MacroAssembler::popReturnAddress() { pop(lr); }
+
+// ===============================================================
+// ABI function calls.
+
+void MacroAssembler::setupUnalignedABICall(Register scratch) {
+  setupNativeABICall();
+  dynamicAlignment_ = true;
+
+  ma_mov(sp, scratch);
+  // Force sp to be aligned.
+  as_bic(sp, sp, Imm8(ABIStackAlignment - 1));
+  ma_push(scratch);
+}
+
+void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
+  MOZ_ASSERT(inCall_);
+  uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
+
+  if (dynamicAlignment_) {
+    // sizeof(intptr_t) accounts for the saved stack pointer pushed by
+    // setupUnalignedABICall.
+    stackForCall += ComputeByteAlignment(stackForCall + sizeof(intptr_t),
+                                         ABIStackAlignment);
+  } else {
+    uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
+    stackForCall += ComputeByteAlignment(
+        stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
+  }
+
+  *stackAdjust = stackForCall;
+  reserveStack(stackForCall);
+
+  // Position all arguments.
+  {
+    enoughMemory_ &= moveResolver_.resolve();
+    if (!enoughMemory_) {
+      return;
+    }
+
+    MoveEmitter emitter(*this);
+    emitter.emit(moveResolver_);
+    emitter.finish();
+  }
+
+  assertStackAlignment(ABIStackAlignment);
+
+  // Save the lr register if we need to preserve it.
+  if (secondScratchReg_ != lr) {
+    ma_mov(lr, secondScratchReg_);
+  }
+}
+
+void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result,
+                                     bool callFromWasm) {
+  if (secondScratchReg_ != lr) {
+    ma_mov(secondScratchReg_, lr);
+  }
+
+  // Calls to native functions in wasm pass through a thunk which already
+  // fixes up the return value for us.
+  if (!callFromWasm && !UseHardFpABI()) {
+    switch (result) {
+      case MoveOp::DOUBLE:
+        // Move double from r0/r1 to ReturnFloatReg.
+        ma_vxfer(r0, r1, ReturnDoubleReg);
+        break;
+      case MoveOp::FLOAT32:
+        // Move float32 from r0 to ReturnFloatReg.
+        ma_vxfer(r0, ReturnFloat32Reg);
+        break;
+      case MoveOp::GENERAL:
+        break;
+      default:
+        MOZ_CRASH("unexpected callWithABI result");
+    }
+  }
+
+  freeStack(stackAdjust);
+
+  if (dynamicAlignment_) {
+    // While the x86 supports pop esp, on ARM that isn't well defined, so
+    // just do it manually.
+    as_dtr(IsLoad, 32, Offset, sp, DTRAddr(sp, DtrOffImm(0)));
+  }
+
+#ifdef DEBUG
+  MOZ_ASSERT(inCall_);
+  inCall_ = false;
+#endif
+}
+
+void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) {
+  // Load the callee in r12, as above.
+  ma_mov(fun, r12);
+  uint32_t stackAdjust;
+  callWithABIPre(&stackAdjust);
+  call(r12);
+  callWithABIPost(stackAdjust, result);
+}
+
+void MacroAssembler::callWithABINoProfiler(const Address& fun,
+                                           MoveOp::Type result) {
+  // Load the callee in r12, no instruction between the ldr and call should
+  // clobber it. Note that we can't use fun.base because it may be one of the
+  // IntArg registers clobbered before the call.
+  {
+    ScratchRegisterScope scratch(*this);
+    ma_ldr(fun, r12, scratch);
+  }
+  uint32_t stackAdjust;
+  callWithABIPre(&stackAdjust);
+  call(r12);
+  callWithABIPost(stackAdjust, result);
+}
+
+// ===============================================================
+// Jit Frames.
+
+uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) {
+  // On ARM any references to the pc, adds an additional 8 to it, which
+  // correspond to 2 instructions of 4 bytes.  Thus we use an additional nop
+  // to pad until we reach the pushed pc.
+  //
+  // Note: In practice this should not be necessary, as this fake return
+  // address is never used for resuming any execution. Thus theoriticaly we
+  // could just do a Push(pc), and ignore the nop as well as the pool.
+  enterNoPool(2);
+  DebugOnly<uint32_t> offsetBeforePush = currentOffset();
+  Push(pc);  // actually pushes $pc + 8.
+  ma_nop();
+  uint32_t pseudoReturnOffset = currentOffset();
+  leaveNoPool();
+
+  MOZ_ASSERT_IF(!oom(), pseudoReturnOffset - offsetBeforePush == 8);
+  return pseudoReturnOffset;
+}
+
+void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch,
+                                               ExitFrameType type) {
+  enterFakeExitFrame(cxreg, scratch, type);
+}
+
+// ===============================================================
+// Move instructions
+
+void MacroAssembler::moveValue(const TypedOrValueRegister& src,
+                               const ValueOperand& dest) {
+  if (src.hasValue()) {
+    moveValue(src.valueReg(), dest);
+    return;
+  }
+
+  MIRType type = src.type();
+  AnyRegister reg = src.typedReg();
+
+  if (!IsFloatingPointType(type)) {
+    if (reg.gpr() != dest.payloadReg()) {
+      mov(reg.gpr(), dest.payloadReg());
+    }
+    mov(ImmWord(MIRTypeToTag(type)), dest.typeReg());
+    return;
+  }
+
+  ScratchDoubleScope scratch(*this);
+  FloatRegister freg = reg.fpu();
+  if (type == MIRType::Float32) {
+    convertFloat32ToDouble(freg, scratch);
+    freg = scratch;
+  }
+  ma_vxfer(freg, dest.payloadReg(), dest.typeReg());
+}
+
+void MacroAssembler::moveValue(const ValueOperand& src,
+                               const ValueOperand& dest) {
+  Register s0 = src.typeReg();
+  Register s1 = src.payloadReg();
+  Register d0 = dest.typeReg();
+  Register d1 = dest.payloadReg();
+
+  // Either one or both of the source registers could be the same as a
+  // destination register.
+  if (s1 == d0) {
+    if (s0 == d1) {
+      // If both are, this is just a swap of two registers.
+      ScratchRegisterScope scratch(*this);
+      MOZ_ASSERT(d1 != scratch);
+      MOZ_ASSERT(d0 != scratch);
+      ma_mov(d1, scratch);
+      ma_mov(d0, d1);
+      ma_mov(scratch, d0);
+      return;
+    }
+    // If only one is, copy that source first.
+    std::swap(s0, s1);
+    std::swap(d0, d1);
+  }
+
+  if (s0 != d0) {
+    ma_mov(s0, d0);
+  }
+  if (s1 != d1) {
+    ma_mov(s1, d1);
+  }
+}
+
+void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
+  ma_mov(Imm32(src.toNunboxTag()), dest.typeReg());
+  if (src.isGCThing()) {
+    ma_mov(ImmGCPtr(src.toGCThing()), dest.payloadReg());
+  } else {
+    ma_mov(Imm32(src.toNunboxPayload()), dest.payloadReg());
+  }
+}
+
+// ===============================================================
+// Branch functions
+
+void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
+  ma_lsr(Imm32(gc::ChunkShift), ptr, buffer);
+  ma_lsl(Imm32(gc::ChunkShift), buffer, buffer);
+  load32(Address(buffer, gc::ChunkStoreBufferOffset), buffer);
+}
+
+void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
+                                             Register temp, Label* label) {
+  Maybe<SecondScratchRegisterScope> scratch2;
+  if (temp == Register::Invalid()) {
+    scratch2.emplace(*this);
+    temp = scratch2.ref();
+  }
+
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+  MOZ_ASSERT(ptr != temp);
+
+  ma_lsr(Imm32(gc::ChunkShift), ptr, temp);
+  ma_lsl(Imm32(gc::ChunkShift), temp, temp);
+  loadPtr(Address(temp, gc::ChunkStoreBufferOffset), temp);
+  branchPtr(InvertCondition(cond), temp, ImmWord(0), label);
+}
+
+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
+                                              const Address& address,
+                                              Register temp, Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+
+  Label done;
+
+  branchTestGCThing(Assembler::NotEqual, address,
+                    cond == Assembler::Equal ? &done : label);
+
+  loadPtr(ToPayload(address), temp);
+  SecondScratchRegisterScope scratch2(*this);
+  branchPtrInNurseryChunk(cond, temp, scratch2, label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
+                                              ValueOperand value, Register temp,
+                                              Label* label) {
+  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+
+  Label done;
+
+  branchTestGCThing(Assembler::NotEqual, value,
+                    cond == Assembler::Equal ? &done : label);
+  branchPtrInNurseryChunk(cond, value.payloadReg(), temp, label);
+
+  bind(&done);
+}
+
+void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
+                                     const Value& rhs, Label* label) {
+  MOZ_ASSERT(cond == Equal || cond == NotEqual);
+  // If cond == NotEqual, branch when a.payload != b.payload || a.tag !=
+  // b.tag. If the payloads are equal, compare the tags. If the payloads are
+  // not equal, short circuit true (NotEqual).
+  //
+  // If cand == Equal, branch when a.payload == b.payload && a.tag == b.tag.
+  // If the payloads are equal, compare the tags. If the payloads are not
+  // equal, short circuit false (NotEqual).
+  ScratchRegisterScope scratch(*this);
+
+  if (rhs.isGCThing()) {
+    ma_cmp(lhs.payloadReg(), ImmGCPtr(rhs.toGCThing()), scratch);
+  } else {
+    ma_cmp(lhs.payloadReg(), Imm32(rhs.toNunboxPayload()), scratch);
+  }
+  ma_cmp(lhs.typeReg(), Imm32(rhs.toNunboxTag()), scratch, Equal);
+  ma_b(label, cond);
+}
+
+// ========================================================================
+// Memory access primitives.
+template <typename T>
+void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
+                                       MIRType valueType, const T& dest) {
+  MOZ_ASSERT(valueType < MIRType::Value);
+
+  if (valueType == MIRType::Double) {
+    storeDouble(value.reg().typedReg().fpu(), dest);
+    return;
+  }
+
+  // Store the type tag.
+  storeTypeTag(ImmType(ValueTypeFromMIRType(valueType)), dest);
+
+  // Store the payload.
+  if (value.constant()) {
+    storePayload(value.value(), dest);
+  } else {
+    storePayload(value.reg().typedReg().gpr(), dest);
+  }
+}
+
+template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
+                                                MIRType valueType,
+                                                const Address& dest);
+template void MacroAssembler::storeUnboxedValue(
+    const ConstantOrRegister& value, MIRType valueType,
+    const BaseObjectElementIndex& dest);
+
+CodeOffset MacroAssembler::wasmTrapInstruction() {
+  return CodeOffset(as_illegal_trap().getOffset());
+}
+
+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
+                                       Register boundsCheckLimit, Label* ok) {
+  as_cmp(index, O2Reg(boundsCheckLimit));
+  as_b(ok, cond);
+  if (JitOptions.spectreIndexMasking) {
+    ma_mov(boundsCheckLimit, index, LeaveCC, cond);
+  }
+}
+
+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
+                                       Address boundsCheckLimit, Label* ok) {
+  ScratchRegisterScope scratch(*this);
+  ma_ldr(DTRAddr(boundsCheckLimit.base, DtrOffImm(boundsCheckLimit.offset)),
+         scratch);
+  as_cmp(index, O2Reg(scratch));
+  as_b(ok, cond);
+  if (JitOptions.spectreIndexMasking) {
+    ma_mov(scratch, index, LeaveCC, cond);
+  }
+}
+
+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
+                                       Register64 boundsCheckLimit, Label* ok) {
+  Label notOk;
+  cmp32(index.high, Imm32(0));
+  j(Assembler::NonZero, &notOk);
+  wasmBoundsCheck32(cond, index.low, boundsCheckLimit.low, ok);
+  bind(&notOk);
+}
+
+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
+                                       Address boundsCheckLimit, Label* ok) {
+  Label notOk;
+  cmp32(index.high, Imm32(0));
+  j(Assembler::NonZero, &notOk);
+  wasmBoundsCheck32(cond, index.low, boundsCheckLimit, ok);
+  bind(&notOk);
+}
+
+void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input,
+                                                Register output,
+                                                bool isSaturating,
+                                                Label* oolEntry) {
+  wasmTruncateToInt32(input, output, MIRType::Double, /* isUnsigned= */ true,
+                      isSaturating, oolEntry);
+}
+
+void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input,
+                                               Register output,
+                                               bool isSaturating,
+                                               Label* oolEntry) {
+  wasmTruncateToInt32(input, output, MIRType::Double, /* isUnsigned= */ false,
+                      isSaturating, oolEntry);
+}
+
+void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input,
+                                                 Register output,
+                                                 bool isSaturating,
+                                                 Label* oolEntry) {
+  wasmTruncateToInt32(input, output, MIRType::Float32, /* isUnsigned= */ true,
+                      isSaturating, oolEntry);
+}
+
+void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input,
+                                                Register output,
+                                                bool isSaturating,
+                                                Label* oolEntry) {
+  wasmTruncateToInt32(input, output, MIRType::Float32, /* isUnsigned= */ false,
+                      isSaturating, oolEntry);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input,
+                                                  Register output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  outOfLineWasmTruncateToIntCheck(input, MIRType::Float32, MIRType::Int32,
+                                  flags, rejoin, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input,
+                                                  Register output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  outOfLineWasmTruncateToIntCheck(input, MIRType::Double, MIRType::Int32, flags,
+                                  rejoin, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input,
+                                                  Register64 output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  outOfLineWasmTruncateToIntCheck(input, MIRType::Float32, MIRType::Int64,
+                                  flags, rejoin, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input,
+                                                  Register64 output,
+                                                  TruncFlags flags,
+                                                  wasm::BytecodeOffset off,
+                                                  Label* rejoin) {
+  outOfLineWasmTruncateToIntCheck(input, MIRType::Double, MIRType::Int64, flags,
+                                  rejoin, off);
+}
+
+void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
+                              Register memoryBase, Register ptr,
+                              Register ptrScratch, AnyRegister output) {
+  wasmLoadImpl(access, memoryBase, ptr, ptrScratch, output,
+               Register64::Invalid());
+}
+
+void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
+                                 Register memoryBase, Register ptr,
+                                 Register ptrScratch, Register64 output) {
+  MOZ_ASSERT_IF(access.isAtomic(), access.byteSize() <= 4);
+  wasmLoadImpl(access, memoryBase, ptr, ptrScratch, AnyRegister(), output);
+}
+
+void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
+                               AnyRegister value, Register memoryBase,
+                               Register ptr, Register ptrScratch) {
+  wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr,
+                ptrScratch);
+}
+
+void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
+                                  Register64 value, Register memoryBase,
+                                  Register ptr, Register ptrScratch) {
+  MOZ_ASSERT(!access.isAtomic());
+  wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr, ptrScratch);
+}
+
+// ========================================================================
+// Primitive atomic operations.
+
+static Register ComputePointerForAtomic(MacroAssembler& masm,
+                                        const BaseIndex& src, Register r) {
+  Register base = src.base;
+  Register index = src.index;
+  uint32_t scale = Imm32::ShiftOf(src.scale).value;
+  int32_t offset = src.offset;
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.as_add(r, base, lsl(index, scale));
+  if (offset != 0) {
+    masm.ma_add(r, Imm32(offset), r, scratch);
+  }
+  return r;
+}
+
+static Register ComputePointerForAtomic(MacroAssembler& masm,
+                                        const Address& src, Register r) {
+  ScratchRegisterScope scratch(masm);
+  if (src.offset == 0) {
+    return src.base;
+  }
+  masm.ma_add(src.base, Imm32(src.offset), r, scratch);
+  return r;
+}
+
+// General algorithm:
+//
+//     ...    ptr, <addr>         ; compute address of item
+//     dmb
+// L0  ldrex* output, [ptr]
+//     sxt*   output, output, 0   ; sign-extend if applicable
+//     *xt*   tmp, oldval, 0      ; sign-extend or zero-extend if applicable
+//     cmp    output, tmp
+//     bne    L1                  ; failed - values are different
+//     strex* tmp, newval, [ptr]
+//     cmp    tmp, 1
+//     beq    L0                  ; failed - location is dirty, retry
+// L1  dmb
+//
+// Discussion here:  http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html.
+// However note that that discussion uses 'isb' as the trailing fence.
+// I've not quite figured out why, and I've gone with dmb here which
+// is safe.  Also see the LLVM source, which uses 'dmb ish' generally.
+// (Apple's Swift CPU apparently handles ish in a non-default, faster
+// way.)
+
+template <typename T>
+static void CompareExchange(MacroAssembler& masm,
+                            const wasm::MemoryAccessDesc* access,
+                            Scalar::Type type, const Synchronization& sync,
+                            const T& mem, Register oldval, Register newval,
+                            Register output) {
+  bool signExtend = Scalar::isSignedIntType(type);
+  unsigned nbytes = Scalar::byteSize(type);
+
+  MOZ_ASSERT(nbytes <= 4);
+
+  Label again;
+  Label done;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  ScratchRegisterScope scratch(masm);
+
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+
+  BufferOffset firstAccess;
+  switch (nbytes) {
+    case 1:
+      firstAccess = masm.as_ldrexb(output, ptr);
+      if (signExtend) {
+        masm.as_sxtb(output, output, 0);
+        masm.as_sxtb(scratch, oldval, 0);
+      } else {
+        masm.as_uxtb(scratch, oldval, 0);
+      }
+      break;
+    case 2:
+      firstAccess = masm.as_ldrexh(output, ptr);
+      if (signExtend) {
+        masm.as_sxth(output, output, 0);
+        masm.as_sxth(scratch, oldval, 0);
+      } else {
+        masm.as_uxth(scratch, oldval, 0);
+      }
+      break;
+    case 4:
+      firstAccess = masm.as_ldrex(output, ptr);
+      break;
+  }
+  if (access) {
+    masm.append(*access, firstAccess.getOffset());
+  }
+
+  if (nbytes < 4) {
+    masm.as_cmp(output, O2Reg(scratch));
+  } else {
+    masm.as_cmp(output, O2Reg(oldval));
+  }
+  masm.as_b(&done, MacroAssembler::NotEqual);
+  switch (nbytes) {
+    case 1:
+      masm.as_strexb(scratch, newval, ptr);
+      break;
+    case 2:
+      masm.as_strexh(scratch, newval, ptr);
+      break;
+    case 4:
+      masm.as_strex(scratch, newval, ptr);
+      break;
+  }
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+  masm.bind(&done);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::compareExchange(Scalar::Type type,
+                                     const Synchronization& sync,
+                                     const Address& address, Register oldval,
+                                     Register newval, Register output) {
+  CompareExchange(*this, nullptr, type, sync, address, oldval, newval, output);
+}
+
+void MacroAssembler::compareExchange(Scalar::Type type,
+                                     const Synchronization& sync,
+                                     const BaseIndex& address, Register oldval,
+                                     Register newval, Register output) {
+  CompareExchange(*this, nullptr, type, sync, address, oldval, newval, output);
+}
+
+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
+                                         const Address& mem, Register oldval,
+                                         Register newval, Register output) {
+  CompareExchange(*this, &access, access.type(), access.sync(), mem, oldval,
+                  newval, output);
+}
+
+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
+                                         const BaseIndex& mem, Register oldval,
+                                         Register newval, Register output) {
+  CompareExchange(*this, &access, access.type(), access.sync(), mem, oldval,
+                  newval, output);
+}
+
+template <typename T>
+static void AtomicExchange(MacroAssembler& masm,
+                           const wasm::MemoryAccessDesc* access,
+                           Scalar::Type type, const Synchronization& sync,
+                           const T& mem, Register value, Register output) {
+  bool signExtend = Scalar::isSignedIntType(type);
+  unsigned nbytes = Scalar::byteSize(type);
+
+  MOZ_ASSERT(nbytes <= 4);
+
+  // Bug 1077321: We may further optimize for ARMv8 (AArch32) here.
+  Label again;
+  Label done;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  ScratchRegisterScope scratch(masm);
+
+  // NOTE: the generated code must match the assembly code in gen_exchange in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+
+  BufferOffset firstAccess;
+  switch (nbytes) {
+    case 1:
+      firstAccess = masm.as_ldrexb(output, ptr);
+      if (signExtend) {
+        masm.as_sxtb(output, output, 0);
+      }
+      masm.as_strexb(scratch, value, ptr);
+      break;
+    case 2:
+      firstAccess = masm.as_ldrexh(output, ptr);
+      if (signExtend) {
+        masm.as_sxth(output, output, 0);
+      }
+      masm.as_strexh(scratch, value, ptr);
+      break;
+    case 4:
+      firstAccess = masm.as_ldrex(output, ptr);
+      masm.as_strex(scratch, value, ptr);
+      break;
+  }
+  if (access) {
+    masm.append(*access, firstAccess.getOffset());
+  }
+
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+  masm.bind(&done);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::atomicExchange(Scalar::Type type,
+                                    const Synchronization& sync,
+                                    const Address& address, Register value,
+                                    Register output) {
+  AtomicExchange(*this, nullptr, type, sync, address, value, output);
+}
+
+void MacroAssembler::atomicExchange(Scalar::Type type,
+                                    const Synchronization& sync,
+                                    const BaseIndex& address, Register value,
+                                    Register output) {
+  AtomicExchange(*this, nullptr, type, sync, address, value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
+                                        const Address& mem, Register value,
+                                        Register output) {
+  AtomicExchange(*this, &access, access.type(), access.sync(), mem, value,
+                 output);
+}
+
+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
+                                        const BaseIndex& mem, Register value,
+                                        Register output) {
+  AtomicExchange(*this, &access, access.type(), access.sync(), mem, value,
+                 output);
+}
+
+// General algorithm:
+//
+//     ...    ptr, <addr>         ; compute address of item
+//     dmb
+// L0  ldrex* output, [ptr]
+//     sxt*   output, output, 0   ; sign-extend if applicable
+//     OP     tmp, output, value  ; compute value to store
+//     strex* tmp2, tmp, [ptr]    ; tmp2 required by strex
+//     cmp    tmp2, 1
+//     beq    L0                  ; failed - location is dirty, retry
+//     dmb                        ; ordering barrier required
+//
+// Also see notes above at compareExchange re the barrier strategy.
+//
+// Observe that the value being operated into the memory element need
+// not be sign-extended because no OP will make use of bits to the
+// left of the bits indicated by the width of the element, and neither
+// output nor the bits stored are affected by OP.
+
+template <typename T>
+static void AtomicFetchOp(MacroAssembler& masm,
+                          const wasm::MemoryAccessDesc* access,
+                          Scalar::Type type, const Synchronization& sync,
+                          AtomicOp op, const Register& value, const T& mem,
+                          Register flagTemp, Register output) {
+  bool signExtend = Scalar::isSignedIntType(type);
+  unsigned nbytes = Scalar::byteSize(type);
+
+  MOZ_ASSERT(nbytes <= 4);
+  MOZ_ASSERT(flagTemp != InvalidReg);
+  MOZ_ASSERT(output != value);
+
+  Label again;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  // NOTE: the generated code must match the assembly code in gen_fetchop in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.bind(&again);
+
+  BufferOffset firstAccess;
+  switch (nbytes) {
+    case 1:
+      firstAccess = masm.as_ldrexb(output, ptr);
+      if (signExtend) {
+        masm.as_sxtb(output, output, 0);
+      }
+      break;
+    case 2:
+      firstAccess = masm.as_ldrexh(output, ptr);
+      if (signExtend) {
+        masm.as_sxth(output, output, 0);
+      }
+      break;
+    case 4:
+      firstAccess = masm.as_ldrex(output, ptr);
+      break;
+  }
+  if (access) {
+    masm.append(*access, firstAccess.getOffset());
+  }
+
+  switch (op) {
+    case AtomicFetchAddOp:
+      masm.as_add(scratch, output, O2Reg(value));
+      break;
+    case AtomicFetchSubOp:
+      masm.as_sub(scratch, output, O2Reg(value));
+      break;
+    case AtomicFetchAndOp:
+      masm.as_and(scratch, output, O2Reg(value));
+      break;
+    case AtomicFetchOrOp:
+      masm.as_orr(scratch, output, O2Reg(value));
+      break;
+    case AtomicFetchXorOp:
+      masm.as_eor(scratch, output, O2Reg(value));
+      break;
+  }
+  // Rd must differ from the two other arguments to strex.
+  switch (nbytes) {
+    case 1:
+      masm.as_strexb(flagTemp, scratch, ptr);
+      break;
+    case 2:
+      masm.as_strexh(flagTemp, scratch, ptr);
+      break;
+    case 4:
+      masm.as_strex(flagTemp, scratch, ptr);
+      break;
+  }
+  masm.as_cmp(flagTemp, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::atomicFetchOp(Scalar::Type type,
+                                   const Synchronization& sync, AtomicOp op,
+                                   Register value, const Address& mem,
+                                   Register temp, Register output) {
+  AtomicFetchOp(*this, nullptr, type, sync, op, value, mem, temp, output);
+}
+
+void MacroAssembler::atomicFetchOp(Scalar::Type type,
+                                   const Synchronization& sync, AtomicOp op,
+                                   Register value, const BaseIndex& mem,
+                                   Register temp, Register output) {
+  AtomicFetchOp(*this, nullptr, type, sync, op, value, mem, temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
+                                       AtomicOp op, Register value,
+                                       const Address& mem, Register temp,
+                                       Register output) {
+  AtomicFetchOp(*this, &access, access.type(), access.sync(), op, value, mem,
+                temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
+                                       AtomicOp op, Register value,
+                                       const BaseIndex& mem, Register temp,
+                                       Register output) {
+  AtomicFetchOp(*this, &access, access.type(), access.sync(), op, value, mem,
+                temp, output);
+}
+
+// Uses both scratch registers, one for the address and one for a temp,
+// but needs two temps for strex:
+//
+//     ...    ptr, <addr>         ; compute address of item
+//     dmb
+// L0  ldrex* temp, [ptr]
+//     OP     temp, temp, value   ; compute value to store
+//     strex* temp2, temp, [ptr]
+//     cmp    temp2, 1
+//     beq    L0                  ; failed - location is dirty, retry
+//     dmb                        ; ordering barrier required
+
+template <typename T>
+static void AtomicEffectOp(MacroAssembler& masm,
+                           const wasm::MemoryAccessDesc* access,
+                           Scalar::Type type, const Synchronization& sync,
+                           AtomicOp op, const Register& value, const T& mem,
+                           Register flagTemp) {
+  unsigned nbytes = Scalar::byteSize(type);
+
+  MOZ_ASSERT(nbytes <= 4);
+  MOZ_ASSERT(flagTemp != InvalidReg);
+
+  Label again;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  masm.memoryBarrierBefore(sync);
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.bind(&again);
+
+  BufferOffset firstAccess;
+  switch (nbytes) {
+    case 1:
+      firstAccess = masm.as_ldrexb(scratch, ptr);
+      break;
+    case 2:
+      firstAccess = masm.as_ldrexh(scratch, ptr);
+      break;
+    case 4:
+      firstAccess = masm.as_ldrex(scratch, ptr);
+      break;
+  }
+  if (access) {
+    masm.append(*access, firstAccess.getOffset());
+  }
+
+  switch (op) {
+    case AtomicFetchAddOp:
+      masm.as_add(scratch, scratch, O2Reg(value));
+      break;
+    case AtomicFetchSubOp:
+      masm.as_sub(scratch, scratch, O2Reg(value));
+      break;
+    case AtomicFetchAndOp:
+      masm.as_and(scratch, scratch, O2Reg(value));
+      break;
+    case AtomicFetchOrOp:
+      masm.as_orr(scratch, scratch, O2Reg(value));
+      break;
+    case AtomicFetchXorOp:
+      masm.as_eor(scratch, scratch, O2Reg(value));
+      break;
+  }
+  // Rd must differ from the two other arguments to strex.
+  switch (nbytes) {
+    case 1:
+      masm.as_strexb(flagTemp, scratch, ptr);
+      break;
+    case 2:
+      masm.as_strexh(flagTemp, scratch, ptr);
+      break;
+    case 4:
+      masm.as_strex(flagTemp, scratch, ptr);
+      break;
+  }
+  masm.as_cmp(flagTemp, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
+                                        AtomicOp op, Register value,
+                                        const Address& mem, Register temp) {
+  AtomicEffectOp(*this, &access, access.type(), access.sync(), op, value, mem,
+                 temp);
+}
+
+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
+                                        AtomicOp op, Register value,
+                                        const BaseIndex& mem, Register temp) {
+  AtomicEffectOp(*this, &access, access.type(), access.sync(), op, value, mem,
+                 temp);
+}
+
+template <typename T>
+static void AtomicLoad64(MacroAssembler& masm,
+                         const wasm::MemoryAccessDesc* access,
+                         const Synchronization& sync, const T& mem,
+                         Register64 output) {
+  MOZ_ASSERT((output.low.code() & 1) == 0);
+  MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+  masm.memoryBarrierBefore(sync);
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  BufferOffset load = masm.as_ldrexd(output.low, output.high, ptr);
+  if (access) {
+    masm.append(*access, load.getOffset());
+  }
+  masm.as_clrex();
+
+  masm.memoryBarrierAfter(sync);
+}
+
+template <typename T>
+static void WasmAtomicLoad64(MacroAssembler& masm,
+                             const wasm::MemoryAccessDesc& access, const T& mem,
+                             Register64 temp, Register64 output) {
+  MOZ_ASSERT(temp.low == InvalidReg && temp.high == InvalidReg);
+
+  AtomicLoad64(masm, &access, access.sync(), mem, output);
+}
+
+void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access,
+                                      const Address& mem, Register64 temp,
+                                      Register64 output) {
+  WasmAtomicLoad64(*this, access, mem, temp, output);
+}
+
+void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access,
+                                      const BaseIndex& mem, Register64 temp,
+                                      Register64 output) {
+  WasmAtomicLoad64(*this, access, mem, temp, output);
+}
+
+template <typename T>
+static void CompareExchange64(MacroAssembler& masm,
+                              const wasm::MemoryAccessDesc* access,
+                              const Synchronization& sync, const T& mem,
+                              Register64 expect, Register64 replace,
+                              Register64 output) {
+  MOZ_ASSERT(expect != replace && replace != output && output != expect);
+
+  MOZ_ASSERT((replace.low.code() & 1) == 0);
+  MOZ_ASSERT(replace.low.code() + 1 == replace.high.code());
+
+  MOZ_ASSERT((output.low.code() & 1) == 0);
+  MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+  Label again;
+  Label done;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+  BufferOffset load = masm.as_ldrexd(output.low, output.high, ptr);
+  if (access) {
+    masm.append(*access, load.getOffset());
+  }
+
+  masm.as_cmp(output.low, O2Reg(expect.low));
+  masm.as_cmp(output.high, O2Reg(expect.high), MacroAssembler::Equal);
+  masm.as_b(&done, MacroAssembler::NotEqual);
+
+  ScratchRegisterScope scratch(masm);
+
+  // Rd (temp) must differ from the two other arguments to strex.
+  masm.as_strexd(scratch, replace.low, replace.high, ptr);
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+  masm.bind(&done);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
+                                           const Address& mem,
+                                           Register64 expect,
+                                           Register64 replace,
+                                           Register64 output) {
+  CompareExchange64(*this, &access, access.sync(), mem, expect, replace,
+                    output);
+}
+
+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
+                                           const BaseIndex& mem,
+                                           Register64 expect,
+                                           Register64 replace,
+                                           Register64 output) {
+  CompareExchange64(*this, &access, access.sync(), mem, expect, replace,
+                    output);
+}
+
+void MacroAssembler::compareExchange64(const Synchronization& sync,
+                                       const Address& mem, Register64 expect,
+                                       Register64 replace, Register64 output) {
+  CompareExchange64(*this, nullptr, sync, mem, expect, replace, output);
+}
+
+void MacroAssembler::compareExchange64(const Synchronization& sync,
+                                       const BaseIndex& mem, Register64 expect,
+                                       Register64 replace, Register64 output) {
+  CompareExchange64(*this, nullptr, sync, mem, expect, replace, output);
+}
+
+template <typename T>
+static void AtomicExchange64(MacroAssembler& masm,
+                             const wasm::MemoryAccessDesc* access,
+                             const Synchronization& sync, const T& mem,
+                             Register64 value, Register64 output) {
+  MOZ_ASSERT(output != value);
+
+  MOZ_ASSERT((value.low.code() & 1) == 0);
+  MOZ_ASSERT(value.low.code() + 1 == value.high.code());
+
+  MOZ_ASSERT((output.low.code() & 1) == 0);
+  MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+  Label again;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+  BufferOffset load = masm.as_ldrexd(output.low, output.high, ptr);
+  if (access) {
+    masm.append(*access, load.getOffset());
+  }
+
+  ScratchRegisterScope scratch(masm);
+
+  masm.as_strexd(scratch, value.low, value.high, ptr);
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+template <typename T>
+static void WasmAtomicExchange64(MacroAssembler& masm,
+                                 const wasm::MemoryAccessDesc& access,
+                                 const T& mem, Register64 value,
+                                 Register64 output) {
+  AtomicExchange64(masm, &access, access.sync(), mem, value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
+                                          const Address& mem, Register64 value,
+                                          Register64 output) {
+  WasmAtomicExchange64(*this, access, mem, value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
+                                          const BaseIndex& mem,
+                                          Register64 value, Register64 output) {
+  WasmAtomicExchange64(*this, access, mem, value, output);
+}
+
+void MacroAssembler::atomicExchange64(const Synchronization& sync,
+                                      const Address& mem, Register64 value,
+                                      Register64 output) {
+  AtomicExchange64(*this, nullptr, sync, mem, value, output);
+}
+
+void MacroAssembler::atomicExchange64(const Synchronization& sync,
+                                      const BaseIndex& mem, Register64 value,
+                                      Register64 output) {
+  AtomicExchange64(*this, nullptr, sync, mem, value, output);
+}
+
+template <typename T>
+static void AtomicFetchOp64(MacroAssembler& masm,
+                            const wasm::MemoryAccessDesc* access,
+                            const Synchronization& sync, AtomicOp op,
+                            Register64 value, const T& mem, Register64 temp,
+                            Register64 output) {
+  MOZ_ASSERT(temp.low != InvalidReg && temp.high != InvalidReg);
+  MOZ_ASSERT(output != value);
+  MOZ_ASSERT(temp != value);
+
+  MOZ_ASSERT((temp.low.code() & 1) == 0);
+  MOZ_ASSERT(temp.low.code() + 1 == temp.high.code());
+
+  // We could avoid this pair requirement but in that case we would end up
+  // with two moves in the loop to preserve the loaded value in output.  The
+  // prize would be less register spilling around this op since the pair
+  // requirement will tend to force more spilling.
+
+  MOZ_ASSERT((output.low.code() & 1) == 0);
+  MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+  Label again;
+
+  SecondScratchRegisterScope scratch2(masm);
+  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
+
+  masm.memoryBarrierBefore(sync);
+
+  masm.bind(&again);
+  BufferOffset load = masm.as_ldrexd(output.low, output.high, ptr);
+  if (access) {
+    masm.append(*access, load.getOffset());
+  }
+  switch (op) {
+    case AtomicFetchAddOp:
+      masm.as_add(temp.low, output.low, O2Reg(value.low), SetCC);
+      masm.as_adc(temp.high, output.high, O2Reg(value.high));
+      break;
+    case AtomicFetchSubOp:
+      masm.as_sub(temp.low, output.low, O2Reg(value.low), SetCC);
+      masm.as_sbc(temp.high, output.high, O2Reg(value.high));
+      break;
+    case AtomicFetchAndOp:
+      masm.as_and(temp.low, output.low, O2Reg(value.low));
+      masm.as_and(temp.high, output.high, O2Reg(value.high));
+      break;
+    case AtomicFetchOrOp:
+      masm.as_orr(temp.low, output.low, O2Reg(value.low));
+      masm.as_orr(temp.high, output.high, O2Reg(value.high));
+      break;
+    case AtomicFetchXorOp:
+      masm.as_eor(temp.low, output.low, O2Reg(value.low));
+      masm.as_eor(temp.high, output.high, O2Reg(value.high));
+      break;
+  }
+
+  ScratchRegisterScope scratch(masm);
+
+  // Rd (temp) must differ from the two other arguments to strex.
+  masm.as_strexd(scratch, temp.low, temp.high, ptr);
+  masm.as_cmp(scratch, Imm8(1));
+  masm.as_b(&again, MacroAssembler::Equal);
+
+  masm.memoryBarrierAfter(sync);
+}
+
+template <typename T>
+static void WasmAtomicFetchOp64(MacroAssembler& masm,
+                                const wasm::MemoryAccessDesc& access,
+                                AtomicOp op, Register64 value, const T& mem,
+                                Register64 temp, Register64 output) {
+  AtomicFetchOp64(masm, &access, access.sync(), op, value, mem, temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
+                                         AtomicOp op, Register64 value,
+                                         const Address& mem, Register64 temp,
+                                         Register64 output) {
+  WasmAtomicFetchOp64(*this, access, op, value, mem, temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
+                                         AtomicOp op, Register64 value,
+                                         const BaseIndex& mem, Register64 temp,
+                                         Register64 output) {
+  WasmAtomicFetchOp64(*this, access, op, value, mem, temp, output);
+}
+
+void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
+                                     Register64 value, const Address& mem,
+                                     Register64 temp, Register64 output) {
+  AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, output);
+}
+
+void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
+                                     Register64 value, const BaseIndex& mem,
+                                     Register64 temp, Register64 output) {
+  AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, output);
+}
+
+void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
+                                      Register64 value, const Address& mem,
+                                      Register64 temp) {
+  AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, temp);
+}
+
+void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
+                                      Register64 value, const BaseIndex& mem,
+                                      Register64 temp) {
+  AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, temp);
+}
+
+// ========================================================================
+// JS atomic operations.
+
+template <typename T>
+static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
+                              const Synchronization& sync, const T& mem,
+                              Register oldval, Register newval, Register temp,
+                              AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.compareExchange(arrayType, sync, mem, oldval, newval, temp);
+    masm.convertUInt32ToDouble(temp, output.fpu());
+  } else {
+    masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr());
+  }
+}
+
+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
+                                       const Synchronization& sync,
+                                       const Address& mem, Register oldval,
+                                       Register newval, Register temp,
+                                       AnyRegister output) {
+  CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
+}
+
+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
+                                       const Synchronization& sync,
+                                       const BaseIndex& mem, Register oldval,
+                                       Register newval, Register temp,
+                                       AnyRegister output) {
+  CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
+}
+
+template <typename T>
+static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
+                             const Synchronization& sync, const T& mem,
+                             Register value, Register temp,
+                             AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.atomicExchange(arrayType, sync, mem, value, temp);
+    masm.convertUInt32ToDouble(temp, output.fpu());
+  } else {
+    masm.atomicExchange(arrayType, sync, mem, value, output.gpr());
+  }
+}
+
+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
+                                      const Synchronization& sync,
+                                      const Address& mem, Register value,
+                                      Register temp, AnyRegister output) {
+  AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
+}
+
+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
+                                      const Synchronization& sync,
+                                      const BaseIndex& mem, Register value,
+                                      Register temp, AnyRegister output) {
+  AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
+}
+
+template <typename T>
+static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType,
+                            const Synchronization& sync, AtomicOp op,
+                            Register value, const T& mem, Register temp1,
+                            Register temp2, AnyRegister output) {
+  if (arrayType == Scalar::Uint32) {
+    masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1);
+    masm.convertUInt32ToDouble(temp1, output.fpu());
+  } else {
+    masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr());
+  }
+}
+
+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
+                                     const Synchronization& sync, AtomicOp op,
+                                     Register value, const Address& mem,
+                                     Register temp1, Register temp2,
+                                     AnyRegister output) {
+  AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
+}
+
+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
+                                     const Synchronization& sync, AtomicOp op,
+                                     Register value, const BaseIndex& mem,
+                                     Register temp1, Register temp2,
+                                     AnyRegister output) {
+  AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
+}
+
+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
+                                      const Synchronization& sync, AtomicOp op,
+                                      Register value, const BaseIndex& mem,
+                                      Register temp) {
+  AtomicEffectOp(*this, nullptr, arrayType, sync, op, value, mem, temp);
+}
+
+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
+                                      const Synchronization& sync, AtomicOp op,
+                                      Register value, const Address& mem,
+                                      Register temp) {
+  AtomicEffectOp(*this, nullptr, arrayType, sync, op, value, mem, temp);
+}
+
+// ========================================================================
+// Primitive atomic operations.
+
+void MacroAssembler::atomicLoad64(const Synchronization& sync,
+                                  const Address& mem, Register64 output) {
+  AtomicLoad64(*this, nullptr, sync, mem, output);
+}
+
+void MacroAssembler::atomicLoad64(const Synchronization& sync,
+                                  const BaseIndex& mem, Register64 output) {
+  AtomicLoad64(*this, nullptr, sync, mem, output);
+}
+
+void MacroAssembler::atomicStore64(const Synchronization& sync,
+                                   const Address& mem, Register64 value,
+                                   Register64 temp) {
+  AtomicExchange64(*this, nullptr, sync, mem, value, temp);
+}
+
+void MacroAssembler::atomicStore64(const Synchronization& sync,
+                                   const BaseIndex& mem, Register64 value,
+                                   Register64 temp) {
+  AtomicExchange64(*this, nullptr, sync, mem, value, temp);
+}
+
+// ========================================================================
+// Convert floating point.
+
+bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
+
+void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
+                                           Register temp) {
+  MOZ_ASSERT(temp == Register::Invalid());
+  ScratchDoubleScope scratchDouble(*this);
+
+  convertUInt32ToDouble(src.high, dest);
+  {
+    ScratchRegisterScope scratch(*this);
+    movePtr(ImmPtr(&TO_DOUBLE_HIGH_SCALE), scratch);
+    ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), scratchDouble);
+  }
+  mulDouble(scratchDouble, dest);
+  convertUInt32ToDouble(src.low, scratchDouble);
+  addDouble(scratchDouble, dest);
+}
+
+void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) {
+  ScratchDoubleScope scratchDouble(*this);
+
+  convertInt32ToDouble(src.high, dest);
+  {
+    ScratchRegisterScope scratch(*this);
+    movePtr(ImmPtr(&TO_DOUBLE_HIGH_SCALE), scratch);
+    ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), scratchDouble);
+  }
+  mulDouble(scratchDouble, dest);
+  convertUInt32ToDouble(src.low, scratchDouble);
+  addDouble(scratchDouble, dest);
+}
+
+void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
+  convertInt32ToDouble(src, dest);
+}
+
+extern "C" {
+extern MOZ_EXPORT int64_t __aeabi_idivmod(int, int);
+extern MOZ_EXPORT int64_t __aeabi_uidivmod(int, int);
+}
+
+inline void EmitRemainderOrQuotient(bool isRemainder, MacroAssembler& masm,
+                                    Register rhs, Register lhsOutput,
+                                    bool isUnsigned,
+                                    const LiveRegisterSet& volatileLiveRegs) {
+  // Currently this helper can't handle this situation.
+  MOZ_ASSERT(lhsOutput != rhs);
+
+  if (HasIDIV()) {
+    if (isRemainder) {
+      masm.remainder32(rhs, lhsOutput, isUnsigned);
+    } else {
+      masm.quotient32(rhs, lhsOutput, isUnsigned);
+    }
+  } else {
+    // Ensure that the output registers are saved and restored properly,
+    MOZ_ASSERT(volatileLiveRegs.has(ReturnRegVal0));
+    MOZ_ASSERT(volatileLiveRegs.has(ReturnRegVal1));
+
+    masm.PushRegsInMask(volatileLiveRegs);
+    using Fn = int64_t (*)(int, int);
+    {
+      ScratchRegisterScope scratch(masm);
+      masm.setupUnalignedABICall(scratch);
+    }
+    masm.passABIArg(lhsOutput);
+    masm.passABIArg(rhs);
+    if (isUnsigned) {
+      masm.callWithABI<Fn, __aeabi_uidivmod>(
+          MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+    } else {
+      masm.callWithABI<Fn, __aeabi_idivmod>(
+          MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+    }
+    if (isRemainder) {
+      masm.mov(ReturnRegVal1, lhsOutput);
+    } else {
+      masm.mov(ReturnRegVal0, lhsOutput);
+    }
+
+    LiveRegisterSet ignore;
+    ignore.add(lhsOutput);
+    masm.PopRegsInMaskIgnore(volatileLiveRegs, ignore);
+  }
+}
+
+void MacroAssembler::flexibleQuotient32(
+    Register rhs, Register srcDest, bool isUnsigned,
+    const LiveRegisterSet& volatileLiveRegs) {
+  EmitRemainderOrQuotient(false, *this, rhs, srcDest, isUnsigned,
+                          volatileLiveRegs);
+}
+
+void MacroAssembler::flexibleRemainder32(
+    Register rhs, Register srcDest, bool isUnsigned,
+    const LiveRegisterSet& volatileLiveRegs) {
+  EmitRemainderOrQuotient(true, *this, rhs, srcDest, isUnsigned,
+                          volatileLiveRegs);
+}
+
+void MacroAssembler::flexibleDivMod32(Register rhs, Register lhsOutput,
+                                      Register remOutput, bool isUnsigned,
+                                      const LiveRegisterSet& volatileLiveRegs) {
+  // Currently this helper can't handle this situation.
+  MOZ_ASSERT(lhsOutput != rhs);
+
+  if (HasIDIV()) {
+    mov(lhsOutput, remOutput);
+    remainder32(rhs, remOutput, isUnsigned);
+    quotient32(rhs, lhsOutput, isUnsigned);
+  } else {
+    // Ensure that the output registers are saved and restored properly,
+    MOZ_ASSERT(volatileLiveRegs.has(ReturnRegVal0));
+    MOZ_ASSERT(volatileLiveRegs.has(ReturnRegVal1));
+    PushRegsInMask(volatileLiveRegs);
+
+    using Fn = int64_t (*)(int, int);
+    {
+      ScratchRegisterScope scratch(*this);
+      setupUnalignedABICall(scratch);
+    }
+    passABIArg(lhsOutput);
+    passABIArg(rhs);
+    if (isUnsigned) {
+      callWithABI<Fn, __aeabi_uidivmod>(MoveOp::GENERAL,
+                                        CheckUnsafeCallWithABI::DontCheckOther);
+    } else {
+      callWithABI<Fn, __aeabi_idivmod>(MoveOp::GENERAL,
+                                       CheckUnsafeCallWithABI::DontCheckOther);
+    }
+    moveRegPair(ReturnRegVal0, ReturnRegVal1, lhsOutput, remOutput);
+
+    LiveRegisterSet ignore;
+    ignore.add(remOutput);
+    ignore.add(lhsOutput);
+    PopRegsInMaskIgnore(volatileLiveRegs, ignore);
+  }
+}
+
+CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
+  return movWithPatch(ImmPtr(nullptr), dest);
+}
+
+void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
+                                          CodeLocationLabel target) {
+  PatchDataWithValueCheck(loc, ImmPtr(target.raw()), ImmPtr(nullptr));
+}
+
+// ========================================================================
+// Spectre Mitigations.
+
+void MacroAssembler::speculationBarrier() {
+  // Spectre mitigation recommended by ARM for cases where csel/cmov cannot be
+  // used.
+  as_csdb();
+}
+
+void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
+                                         Label* fail) {
+  floorf(src, dest, fail);
+}
+
+void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  floor(src, dest, fail);
+}
+
+void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  ceilf(src, dest, fail);
+}
+
+void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest,
+                                       Label* fail) {
+  ceil(src, dest, fail);
+}
+
+void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest,
+                                         FloatRegister temp, Label* fail) {
+  roundf(src, dest, fail, temp);
+}
+
+void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest,
+                                        FloatRegister temp, Label* fail) {
+  round(src, dest, fail, temp);
+}
+
+void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest,
+                                         Label* fail) {
+  truncf(src, dest, fail);
+}
+
+void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest,
+                                        Label* fail) {
+  trunc(src, dest, fail);
+}
+
+void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src,
+                                     FloatRegister dest) {
+  MOZ_CRASH("not supported on this platform");
+}
+
+void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src,
+                                      FloatRegister dest) {
+  MOZ_CRASH("not supported on this platform");
+}
+
+void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
+                                    FloatRegister output) {
+  MOZ_CRASH("not supported on this platform");
+}
+
+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
+                                        Register pointer) {
+  if (IsShiftInScaleRange(shift)) {
+    computeEffectiveAddress(
+        BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer);
+    return;
+  }
+  lshift32(Imm32(shift), indexTemp32);
+  addPtr(indexTemp32, pointer);
+}
+
+//}}} check_macroassembler_style
+
+void MacroAssemblerARM::wasmTruncateToInt32(FloatRegister input,
+                                            Register output, MIRType fromType,
+                                            bool isUnsigned, bool isSaturating,
+                                            Label* oolEntry) {
+  ScratchDoubleScope scratchScope(asMasm());
+  ScratchRegisterScope scratchReg(asMasm());
+  FloatRegister scratch = scratchScope.uintOverlay();
+
+  // ARM conversion instructions clamp the value to ensure it fits within the
+  // target's type bounds, so every time we see those, we need to check the
+  // input. A NaN check is not necessary because NaN is converted to zero and
+  // on a zero result we branch out of line to do further processing anyway.
+  if (isUnsigned) {
+    if (fromType == MIRType::Double) {
+      ma_vcvt_F64_U32(input, scratch);
+    } else if (fromType == MIRType::Float32) {
+      ma_vcvt_F32_U32(input, scratch);
+    } else {
+      MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
+    }
+
+    ma_vxfer(scratch, output);
+
+    if (!isSaturating) {
+      // int32_t(UINT32_MAX) == -1.
+      ma_cmp(output, Imm32(-1), scratchReg);
+      as_cmp(output, Imm8(0), Assembler::NotEqual);
+      ma_b(oolEntry, Assembler::Equal);
+    }
+
+    return;
+  }
+
+  // vcvt* converts NaN into 0, so check for NaNs here.
+  if (!isSaturating) {
+    if (fromType == MIRType::Double) {
+      asMasm().compareDouble(input, input);
+    } else if (fromType == MIRType::Float32) {
+      asMasm().compareFloat(input, input);
+    } else {
+      MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
+    }
+
+    ma_b(oolEntry, Assembler::VFP_Unordered);
+  }
+
+  scratch = scratchScope.sintOverlay();
+
+  if (fromType == MIRType::Double) {
+    ma_vcvt_F64_I32(input, scratch);
+  } else if (fromType == MIRType::Float32) {
+    ma_vcvt_F32_I32(input, scratch);
+  } else {
+    MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
+  }
+
+  ma_vxfer(scratch, output);
+
+  if (!isSaturating) {
+    ma_cmp(output, Imm32(INT32_MAX), scratchReg);
+    ma_cmp(output, Imm32(INT32_MIN), scratchReg, Assembler::NotEqual);
+    ma_b(oolEntry, Assembler::Equal);
+  }
+}
+
+void MacroAssemblerARM::outOfLineWasmTruncateToIntCheck(
+    FloatRegister input, MIRType fromType, MIRType toType, TruncFlags flags,
+    Label* rejoin, wasm::BytecodeOffset trapOffset) {
+  // On ARM, saturating truncation codegen handles saturating itself rather
+  // than relying on out-of-line fixup code.
+  if (flags & TRUNC_SATURATING) {
+    return;
+  }
+
+  bool isUnsigned = flags & TRUNC_UNSIGNED;
+  ScratchDoubleScope scratchScope(asMasm());
+  FloatRegister scratch;
+
+  // Eagerly take care of NaNs.
+  Label inputIsNaN;
+  if (fromType == MIRType::Double) {
+    asMasm().branchDouble(Assembler::DoubleUnordered, input, input,
+                          &inputIsNaN);
+  } else if (fromType == MIRType::Float32) {
+    asMasm().branchFloat(Assembler::DoubleUnordered, input, input, &inputIsNaN);
+  } else {
+    MOZ_CRASH("unexpected type in visitOutOfLineWasmTruncateCheck");
+  }
+
+  // Handle special values.
+  Label fail;
+
+  // By default test for the following inputs and bail:
+  // signed:   ] -Inf, INTXX_MIN - 1.0 ] and [ INTXX_MAX + 1.0 : +Inf [
+  // unsigned: ] -Inf, -1.0 ] and [ UINTXX_MAX + 1.0 : +Inf [
+  // Note: we cannot always represent those exact values. As a result
+  // this changes the actual comparison a bit.
+  double minValue, maxValue;
+  Assembler::DoubleCondition minCond = Assembler::DoubleLessThanOrEqual;
+  Assembler::DoubleCondition maxCond = Assembler::DoubleGreaterThanOrEqual;
+  if (toType == MIRType::Int64) {
+    if (isUnsigned) {
+      minValue = -1;
+      maxValue = double(UINT64_MAX) + 1.0;
+    } else {
+      // In the float32/double range there exists no value between
+      // INT64_MIN and INT64_MIN - 1.0. Making INT64_MIN the lower-bound.
+      minValue = double(INT64_MIN);
+      minCond = Assembler::DoubleLessThan;
+      maxValue = double(INT64_MAX) + 1.0;
+    }
+  } else {
+    if (isUnsigned) {
+      minValue = -1;
+      maxValue = double(UINT32_MAX) + 1.0;
+    } else {
+      if (fromType == MIRType::Float32) {
+        // In the float32 range there exists no value between
+        // INT32_MIN and INT32_MIN - 1.0. Making INT32_MIN the lower-bound.
+        minValue = double(INT32_MIN);
+        minCond = Assembler::DoubleLessThan;
+      } else {
+        minValue = double(INT32_MIN) - 1.0;
+      }
+      maxValue = double(INT32_MAX) + 1.0;
+    }
+  }
+
+  if (fromType == MIRType::Double) {
+    scratch = scratchScope.doubleOverlay();
+    asMasm().loadConstantDouble(minValue, scratch);
+    asMasm().branchDouble(minCond, input, scratch, &fail);
+
+    asMasm().loadConstantDouble(maxValue, scratch);
+    asMasm().branchDouble(maxCond, input, scratch, &fail);
+  } else {
+    MOZ_ASSERT(fromType == MIRType::Float32);
+    scratch = scratchScope.singleOverlay();
+    asMasm().loadConstantFloat32(float(minValue), scratch);
+    asMasm().branchFloat(minCond, input, scratch, &fail);
+
+    asMasm().loadConstantFloat32(float(maxValue), scratch);
+    asMasm().branchFloat(maxCond, input, scratch, &fail);
+  }
+
+  // We had an actual correct value, get back to where we were.
+  ma_b(rejoin);
+
+  // Handle errors.
+  bind(&fail);
+  asMasm().wasmTrap(wasm::Trap::IntegerOverflow, trapOffset);
+
+  bind(&inputIsNaN);
+  asMasm().wasmTrap(wasm::Trap::InvalidConversionToInteger, trapOffset);
+}
+
+void MacroAssemblerARM::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
+                                     Register memoryBase, Register ptr,
+                                     Register ptrScratch, AnyRegister output,
+                                     Register64 out64) {
+  MOZ_ASSERT(ptr == ptrScratch);
+  MOZ_ASSERT(!access.isZeroExtendSimd128Load());
+  MOZ_ASSERT(!access.isSplatSimd128Load());
+  MOZ_ASSERT(!access.isWidenSimd128Load());
+
+  uint32_t offset = access.offset();
+  MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
+
+  Scalar::Type type = access.type();
+
+  // Maybe add the offset.
+  if (offset || type == Scalar::Int64) {
+    ScratchRegisterScope scratch(asMasm());
+    if (offset) {
+      ma_add(Imm32(offset), ptr, scratch);
+    }
+  }
+
+  bool isSigned = type == Scalar::Int8 || type == Scalar::Int16 ||
+                  type == Scalar::Int32 || type == Scalar::Int64;
+  unsigned byteSize = access.byteSize();
+
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
+  asMasm().memoryBarrierBefore(access.sync());
+
+  BufferOffset load;
+  if (out64 != Register64::Invalid()) {
+    if (type == Scalar::Int64) {
+      static_assert(INT64LOW_OFFSET == 0);
+
+      load = ma_dataTransferN(IsLoad, 32, /* signed = */ false, memoryBase, ptr,
+                              out64.low);
+      append(access, load.getOffset());
+
+      as_add(ptr, ptr, Imm8(INT64HIGH_OFFSET));
+
+      load =
+          ma_dataTransferN(IsLoad, 32, isSigned, memoryBase, ptr, out64.high);
+      append(access, load.getOffset());
+    } else {
+      load = ma_dataTransferN(IsLoad, byteSize * 8, isSigned, memoryBase, ptr,
+                              out64.low);
+      append(access, load.getOffset());
+
+      if (isSigned) {
+        ma_asr(Imm32(31), out64.low, out64.high);
+      } else {
+        ma_mov(Imm32(0), out64.high);
+      }
+    }
+  } else {
+    bool isFloat = output.isFloat();
+    if (isFloat) {
+      MOZ_ASSERT((byteSize == 4) == output.fpu().isSingle());
+      ScratchRegisterScope scratch(asMasm());
+      FloatRegister dest = output.fpu();
+      ma_add(memoryBase, ptr, scratch);
+
+      // FP loads can't use VLDR as that has stringent alignment checks and will
+      // SIGBUS on unaligned accesses.  Choose a different strategy depending on
+      // the available hardware. We don't gate Wasm on the presence of NEON.
+      if (HasNEON()) {
+        // NEON available: The VLD1 multiple-single-elements variant will only
+        // trap if SCTRL.A==1, but we already assume (for integer accesses) that
+        // the hardware/OS handles that transparently.
+        //
+        // An additional complication is that if we're targeting the high single
+        // then an unaligned load is not possible, and we may need to go via the
+        // FPR scratch.
+        if (byteSize == 4 && dest.code() & 1) {
+          ScratchFloat32Scope fscratch(asMasm());
+          load = as_vldr_unaligned(fscratch, scratch);
+          as_vmov(dest, fscratch);
+        } else {
+          load = as_vldr_unaligned(dest, scratch);
+        }
+      } else {
+        // NEON not available: Load to GPR scratch, move to FPR destination.  We
+        // don't have adjacent scratches for the f64, so use individual LDRs,
+        // not LDRD.
+        SecondScratchRegisterScope scratch2(asMasm());
+        if (byteSize == 4) {
+          load = as_dtr(IsLoad, 32, Offset, scratch2,
+                        DTRAddr(scratch, DtrOffImm(0)), Always);
+          as_vxfer(scratch2, InvalidReg, VFPRegister(dest), CoreToFloat,
+                   Always);
+        } else {
+          // The trap information is associated with the load of the high word,
+          // which must be done first.
+          load = as_dtr(IsLoad, 32, Offset, scratch2,
+                        DTRAddr(scratch, DtrOffImm(4)), Always);
+          as_dtr(IsLoad, 32, Offset, scratch, DTRAddr(scratch, DtrOffImm(0)),
+                 Always);
+          as_vxfer(scratch, scratch2, VFPRegister(dest), CoreToFloat, Always);
+        }
+      }
+      append(access, load.getOffset());
+    } else {
+      load = ma_dataTransferN(IsLoad, byteSize * 8, isSigned, memoryBase, ptr,
+                              output.gpr());
+      append(access, load.getOffset());
+    }
+  }
+
+  asMasm().memoryBarrierAfter(access.sync());
+}
+
+void MacroAssemblerARM::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
+                                      AnyRegister value, Register64 val64,
+                                      Register memoryBase, Register ptr,
+                                      Register ptrScratch) {
+  static_assert(INT64LOW_OFFSET == 0);
+  static_assert(INT64HIGH_OFFSET == 4);
+
+  MOZ_ASSERT(ptr == ptrScratch);
+
+  uint32_t offset = access.offset();
+  MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
+
+  unsigned byteSize = access.byteSize();
+  Scalar::Type type = access.type();
+
+  // Maybe add the offset.
+  if (offset || type == Scalar::Int64) {
+    ScratchRegisterScope scratch(asMasm());
+    // We need to store the high word of an Int64 first, so always adjust the
+    // pointer to point to the high word in this case.  The adjustment is always
+    // OK because wasmMaxOffsetGuardLimit is computed so that we can add up to
+    // sizeof(LargestValue)-1 without skipping past the guard page, and we
+    // assert above that offset < wasmMaxOffsetGuardLimit.
+    if (type == Scalar::Int64) {
+      offset += INT64HIGH_OFFSET;
+    }
+    if (offset) {
+      ma_add(Imm32(offset), ptr, scratch);
+    }
+  }
+
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
+  asMasm().memoryBarrierBefore(access.sync());
+
+  BufferOffset store;
+  if (type == Scalar::Int64) {
+    store = ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ false,
+                             memoryBase, ptr, val64.high);
+    append(access, store.getOffset());
+
+    as_sub(ptr, ptr, Imm8(INT64HIGH_OFFSET));
+
+    store = ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ true,
+                             memoryBase, ptr, val64.low);
+    append(access, store.getOffset());
+  } else {
+    if (value.isFloat()) {
+      ScratchRegisterScope scratch(asMasm());
+      FloatRegister val = value.fpu();
+      MOZ_ASSERT((byteSize == 4) == val.isSingle());
+      ma_add(memoryBase, ptr, scratch);
+
+      // See comments above at wasmLoadImpl for more about this logic.
+      if (HasNEON()) {
+        if (byteSize == 4 && (val.code() & 1)) {
+          ScratchFloat32Scope fscratch(asMasm());
+          as_vmov(fscratch, val);
+          store = as_vstr_unaligned(fscratch, scratch);
+        } else {
+          store = as_vstr_unaligned(val, scratch);
+        }
+      } else {
+        // NEON not available: Move FPR to GPR scratch, store GPR.  We have only
+        // one scratch to hold the value, so for f64 we must do two separate
+        // moves.  That's OK - this is really a corner case.  If we really cared
+        // we would pass in a temp to avoid the second move.
+        SecondScratchRegisterScope scratch2(asMasm());
+        if (byteSize == 4) {
+          as_vxfer(scratch2, InvalidReg, VFPRegister(val), FloatToCore, Always);
+          store = as_dtr(IsStore, 32, Offset, scratch2,
+                         DTRAddr(scratch, DtrOffImm(0)), Always);
+        } else {
+          // The trap information is associated with the store of the high word,
+          // which must be done first.
+          as_vxfer(scratch2, InvalidReg, VFPRegister(val).singleOverlay(1),
+                   FloatToCore, Always);
+          store = as_dtr(IsStore, 32, Offset, scratch2,
+                         DTRAddr(scratch, DtrOffImm(4)), Always);
+          as_vxfer(scratch2, InvalidReg, VFPRegister(val).singleOverlay(0),
+                   FloatToCore, Always);
+          as_dtr(IsStore, 32, Offset, scratch2, DTRAddr(scratch, DtrOffImm(0)),
+                 Always);
+        }
+      }
+      append(access, store.getOffset());
+    } else {
+      bool isSigned = type == Scalar::Uint32 ||
+                      type == Scalar::Int32;  // see AsmJSStoreHeap;
+      Register val = value.gpr();
+
+      store = ma_dataTransferN(IsStore, 8 * byteSize /* bits */, isSigned,
+                               memoryBase, ptr, val);
+      append(access, store.getOffset());
+    }
+  }
+
+  asMasm().memoryBarrierAfter(access.sync());
+}
diff --git a/js/src/jit/arm/MacroAssembler-arm.h b/js/src/jit/arm/MacroAssembler-arm.h
new file mode 100644
index 0000000000..958cdf4718
--- /dev/null
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -0,0 +1,1392 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_MacroAssembler_arm_h
+#define jit_arm_MacroAssembler_arm_h
+
+#include "mozilla/DebugOnly.h"
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/MoveResolver.h"
+#include "vm/BytecodeUtil.h"
+#include "wasm/WasmBuiltins.h"
+#include "wasm/WasmCodegenTypes.h"
+
+namespace js {
+namespace jit {
+
+static Register CallReg = ip;
+static const int defaultShift = 3;
+static_assert(1 << defaultShift == sizeof(JS::Value));
+
+// See documentation for ScratchTagScope and ScratchTagScopeRelease in
+// MacroAssembler-x64.h.
+
+class ScratchTagScope {
+  const ValueOperand& v_;
+
+ public:
+  ScratchTagScope(MacroAssembler&, const ValueOperand& v) : v_(v) {}
+  operator Register() { return v_.typeReg(); }
+  void release() {}
+  void reacquire() {}
+};
+
+class ScratchTagScopeRelease {
+ public:
+  explicit ScratchTagScopeRelease(ScratchTagScope*) {}
+};
+
+// MacroAssemblerARM is inheriting form Assembler defined in
+// Assembler-arm.{h,cpp}
+class MacroAssemblerARM : public Assembler {
+ private:
+  // Perform a downcast. Should be removed by Bug 996602.
+  MacroAssembler& asMasm();
+  const MacroAssembler& asMasm() const;
+
+ protected:
+  // On ARM, some instructions require a second scratch register. This
+  // register defaults to lr, since it's non-allocatable (as it can be
+  // clobbered by some instructions). Allow the baseline compiler to override
+  // this though, since baseline IC stubs rely on lr holding the return
+  // address.
+  Register secondScratchReg_;
+
+ public:
+  Register getSecondScratchReg() const { return secondScratchReg_; }
+
+ public:
+  // Higher level tag testing code.
+  // TODO: Can probably remove the Operand versions.
+  Operand ToPayload(Operand base) const {
+    return Operand(Register::FromCode(base.base()), base.disp());
+  }
+  Address ToPayload(const Address& base) const { return base; }
+  BaseIndex ToPayload(const BaseIndex& base) const { return base; }
+
+ protected:
+  Operand ToType(Operand base) const {
+    return Operand(Register::FromCode(base.base()),
+                   base.disp() + sizeof(void*));
+  }
+  Address ToType(const Address& base) const {
+    return ToType(Operand(base)).toAddress();
+  }
+  BaseIndex ToType(const BaseIndex& base) const {
+    return BaseIndex(base.base, base.index, base.scale,
+                     base.offset + sizeof(void*));
+  }
+
+  Address ToPayloadAfterStackPush(const Address& base) const {
+    // If we are based on StackPointer, pass over the type tag just pushed.
+    if (base.base == StackPointer) {
+      return Address(base.base, base.offset + sizeof(void*));
+    }
+    return ToPayload(base);
+  }
+
+ public:
+  MacroAssemblerARM() : secondScratchReg_(lr) {}
+
+  void setSecondScratchReg(Register reg) {
+    MOZ_ASSERT(reg != ScratchRegister);
+    secondScratchReg_ = reg;
+  }
+
+  void convertBoolToInt32(Register source, Register dest);
+  void convertInt32ToDouble(Register src, FloatRegister dest);
+  void convertInt32ToDouble(const Address& src, FloatRegister dest);
+  void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest);
+  void convertUInt32ToFloat32(Register src, FloatRegister dest);
+  void convertUInt32ToDouble(Register src, FloatRegister dest);
+  void convertDoubleToFloat32(FloatRegister src, FloatRegister dest,
+                              Condition c = Always);
+  void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
+                            bool negativeZeroCheck = true);
+  void convertDoubleToPtr(FloatRegister src, Register dest, Label* fail,
+                          bool negativeZeroCheck = true) {
+    convertDoubleToInt32(src, dest, fail, negativeZeroCheck);
+  }
+  void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
+                             bool negativeZeroCheck = true);
+
+  void convertFloat32ToDouble(FloatRegister src, FloatRegister dest);
+  void convertInt32ToFloat32(Register src, FloatRegister dest);
+  void convertInt32ToFloat32(const Address& src, FloatRegister dest);
+
+  void wasmTruncateToInt32(FloatRegister input, Register output,
+                           MIRType fromType, bool isUnsigned, bool isSaturating,
+                           Label* oolEntry);
+  void outOfLineWasmTruncateToIntCheck(FloatRegister input, MIRType fromType,
+                                       MIRType toType, TruncFlags flags,
+                                       Label* rejoin,
+                                       wasm::BytecodeOffset trapOffset);
+
+  // Somewhat direct wrappers for the low-level assembler funcitons
+  // bitops. Attempt to encode a virtual alu instruction using two real
+  // instructions.
+ private:
+  bool alu_dbl(Register src1, Imm32 imm, Register dest, ALUOp op, SBit s,
+               Condition c);
+
+ public:
+  void ma_alu(Register src1, Imm32 imm, Register dest,
+              AutoRegisterScope& scratch, ALUOp op, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_alu(Register src1, Operand2 op2, Register dest, ALUOp op,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_alu(Register src1, Operand op2, Register dest, ALUOp op,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_nop();
+
+  BufferOffset ma_movPatchable(Imm32 imm, Register dest,
+                               Assembler::Condition c);
+  BufferOffset ma_movPatchable(ImmPtr imm, Register dest,
+                               Assembler::Condition c);
+
+  // To be used with Iter := InstructionIterator or BufferInstructionIterator.
+  template <class Iter>
+  static void ma_mov_patch(Imm32 imm, Register dest, Assembler::Condition c,
+                           RelocStyle rs, Iter iter);
+
+  // ALU based ops
+  // mov
+  void ma_mov(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_mov(Imm32 imm, Register dest, Condition c = Always);
+  void ma_mov(ImmWord imm, Register dest, Condition c = Always);
+
+  void ma_mov(ImmGCPtr ptr, Register dest);
+
+  // Shifts (just a move with a shifting op2)
+  void ma_lsl(Imm32 shift, Register src, Register dst);
+  void ma_lsr(Imm32 shift, Register src, Register dst);
+  void ma_asr(Imm32 shift, Register src, Register dst);
+  void ma_ror(Imm32 shift, Register src, Register dst);
+  void ma_rol(Imm32 shift, Register src, Register dst);
+
+  void ma_lsl(Register shift, Register src, Register dst);
+  void ma_lsr(Register shift, Register src, Register dst);
+  void ma_asr(Register shift, Register src, Register dst);
+  void ma_ror(Register shift, Register src, Register dst);
+  void ma_rol(Register shift, Register src, Register dst,
+              AutoRegisterScope& scratch);
+
+  // Move not (dest <- ~src)
+  void ma_mvn(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Negate (dest <- -src) implemented as rsb dest, src, 0
+  void ma_neg(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_neg(Register64 src, Register64 dest);
+
+  // And
+  void ma_and(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_and(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_and(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+
+  void ma_and(Imm32 imm, Register src1, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Bit clear (dest <- dest & ~imm) or (dest <- src1 & ~src2)
+  void ma_bic(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+
+  // Exclusive or
+  void ma_eor(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_eor(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_eor(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+
+  void ma_eor(Imm32 imm, Register src1, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Or
+  void ma_orr(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_orr(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  void ma_orr(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+
+  void ma_orr(Imm32 imm, Register src1, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Arithmetic based ops.
+  // Add with carry:
+  void ma_adc(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_adc(Register src, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_adc(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_adc(Register src1, Imm32 op, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Add:
+  void ma_add(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_add(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_add(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_add(Register src1, Operand op, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_add(Register src1, Imm32 op, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Subtract with carry:
+  void ma_sbc(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_sbc(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_sbc(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Subtract:
+  void ma_sub(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_sub(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_sub(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_sub(Register src1, Operand op, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_sub(Register src1, Imm32 op, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Reverse subtract:
+  void ma_rsb(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_rsb(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_rsb(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_rsb(Register src1, Imm32 op2, Register dest,
+              AutoRegisterScope& scratch, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Reverse subtract with carry:
+  void ma_rsc(Imm32 imm, Register dest, AutoRegisterScope& scratch,
+              SBit s = LeaveCC, Condition c = Always);
+  void ma_rsc(Register src1, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+  void ma_rsc(Register src1, Register src2, Register dest, SBit s = LeaveCC,
+              Condition c = Always);
+
+  // Compares/tests.
+  // Compare negative (sets condition codes as src1 + src2 would):
+  void ma_cmn(Register src1, Imm32 imm, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_cmn(Register src1, Register src2, Condition c = Always);
+  void ma_cmn(Register src1, Operand op, Condition c = Always);
+
+  // Compare (src - src2):
+  void ma_cmp(Register src1, Imm32 imm, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_cmp(Register src1, ImmTag tag, Condition c = Always);
+  void ma_cmp(Register src1, ImmWord ptr, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_cmp(Register src1, ImmGCPtr ptr, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_cmp(Register src1, Operand op, AutoRegisterScope& scratch,
+              AutoRegisterScope& scratch2, Condition c = Always);
+  void ma_cmp(Register src1, Register src2, Condition c = Always);
+
+  // Test for equality, (src1 ^ src2):
+  void ma_teq(Register src1, Imm32 imm, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_teq(Register src1, Register src2, Condition c = Always);
+  void ma_teq(Register src1, Operand op, Condition c = Always);
+
+  // Test (src1 & src2):
+  void ma_tst(Register src1, Imm32 imm, AutoRegisterScope& scratch,
+              Condition c = Always);
+  void ma_tst(Register src1, Register src2, Condition c = Always);
+  void ma_tst(Register src1, Operand op, Condition c = Always);
+
+  // Multiplies. For now, there are only two that we care about.
+  void ma_mul(Register src1, Register src2, Register dest);
+  void ma_mul(Register src1, Imm32 imm, Register dest,
+              AutoRegisterScope& scratch);
+  Condition ma_check_mul(Register src1, Register src2, Register dest,
+                         AutoRegisterScope& scratch, Condition cond);
+  Condition ma_check_mul(Register src1, Imm32 imm, Register dest,
+                         AutoRegisterScope& scratch, Condition cond);
+
+  void ma_umull(Register src1, Imm32 imm, Register destHigh, Register destLow,
+                AutoRegisterScope& scratch);
+  void ma_umull(Register src1, Register src2, Register destHigh,
+                Register destLow);
+
+  // Fast mod, uses scratch registers, and thus needs to be in the assembler
+  // implicitly assumes that we can overwrite dest at the beginning of the
+  // sequence.
+  void ma_mod_mask(Register src, Register dest, Register hold, Register tmp,
+                   AutoRegisterScope& scratch, AutoRegisterScope& scratch2,
+                   int32_t shift);
+
+  // Mod - depends on integer divide instructions being supported.
+  void ma_smod(Register num, Register div, Register dest,
+               AutoRegisterScope& scratch);
+  void ma_umod(Register num, Register div, Register dest,
+               AutoRegisterScope& scratch);
+
+  // Division - depends on integer divide instructions being supported.
+  void ma_sdiv(Register num, Register div, Register dest,
+               Condition cond = Always);
+  void ma_udiv(Register num, Register div, Register dest,
+               Condition cond = Always);
+  // Misc operations
+  void ma_clz(Register src, Register dest, Condition cond = Always);
+  void ma_ctz(Register src, Register dest, AutoRegisterScope& scratch);
+  // Memory:
+  // Shortcut for when we know we're transferring 32 bits of data.
+  void ma_dtr(LoadStore ls, Register rn, Imm32 offset, Register rt,
+              AutoRegisterScope& scratch, Index mode = Offset,
+              Condition cc = Always);
+  void ma_dtr(LoadStore ls, Register rt, const Address& addr,
+              AutoRegisterScope& scratch, Index mode, Condition cc);
+
+  void ma_str(Register rt, DTRAddr addr, Index mode = Offset,
+              Condition cc = Always);
+  void ma_str(Register rt, const Address& addr, AutoRegisterScope& scratch,
+              Index mode = Offset, Condition cc = Always);
+
+  void ma_ldr(DTRAddr addr, Register rt, Index mode = Offset,
+              Condition cc = Always);
+  void ma_ldr(const Address& addr, Register rt, AutoRegisterScope& scratch,
+              Index mode = Offset, Condition cc = Always);
+
+  void ma_ldrb(DTRAddr addr, Register rt, Index mode = Offset,
+               Condition cc = Always);
+  void ma_ldrh(EDtrAddr addr, Register rt, Index mode = Offset,
+               Condition cc = Always);
+  void ma_ldrsh(EDtrAddr addr, Register rt, Index mode = Offset,
+                Condition cc = Always);
+  void ma_ldrsb(EDtrAddr addr, Register rt, Index mode = Offset,
+                Condition cc = Always);
+  void ma_ldrd(EDtrAddr addr, Register rt, mozilla::DebugOnly<Register> rt2,
+               Index mode = Offset, Condition cc = Always);
+  void ma_strb(Register rt, DTRAddr addr, Index mode = Offset,
+               Condition cc = Always);
+  void ma_strh(Register rt, EDtrAddr addr, Index mode = Offset,
+               Condition cc = Always);
+  void ma_strd(Register rt, mozilla::DebugOnly<Register> rt2, EDtrAddr addr,
+               Index mode = Offset, Condition cc = Always);
+
+  // Specialty for moving N bits of data, where n == 8,16,32,64.
+  BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+                                Register rn, Register rm, Register rt,
+                                AutoRegisterScope& scratch, Index mode = Offset,
+                                Condition cc = Always, Scale scale = TimesOne);
+
+  BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+                                Register rn, Register rm, Register rt,
+                                Index mode = Offset, Condition cc = Always);
+
+  BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+                                Register rn, Imm32 offset, Register rt,
+                                AutoRegisterScope& scratch, Index mode = Offset,
+                                Condition cc = Always);
+
+  void ma_pop(Register r);
+  void ma_popn_pc(Imm32 n, AutoRegisterScope& scratch,
+                  AutoRegisterScope& scratch2);
+  void ma_push(Register r);
+  void ma_push_sp(Register r, AutoRegisterScope& scratch);
+
+  void ma_vpop(VFPRegister r);
+  void ma_vpush(VFPRegister r);
+
+  // Barriers.
+  void ma_dmb(BarrierOption option = BarrierSY);
+  void ma_dsb(BarrierOption option = BarrierSY);
+
+  // Branches when done from within arm-specific code.
+  BufferOffset ma_b(Label* dest, Condition c = Always);
+  void ma_b(void* target, Condition c = Always);
+  void ma_bx(Register dest, Condition c = Always);
+
+  // This is almost NEVER necessary, we'll basically never be calling a label
+  // except, possibly in the crazy bailout-table case.
+  void ma_bl(Label* dest, Condition c = Always);
+
+  void ma_blx(Register dest, Condition c = Always);
+
+  // VFP/ALU:
+  void ma_vadd(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+  void ma_vsub(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+  void ma_vmul(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+  void ma_vdiv(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+  void ma_vneg(FloatRegister src, FloatRegister dest, Condition cc = Always);
+  void ma_vmov(FloatRegister src, FloatRegister dest, Condition cc = Always);
+  void ma_vmov_f32(FloatRegister src, FloatRegister dest,
+                   Condition cc = Always);
+  void ma_vabs(FloatRegister src, FloatRegister dest, Condition cc = Always);
+  void ma_vabs_f32(FloatRegister src, FloatRegister dest,
+                   Condition cc = Always);
+
+  void ma_vsqrt(FloatRegister src, FloatRegister dest, Condition cc = Always);
+  void ma_vsqrt_f32(FloatRegister src, FloatRegister dest,
+                    Condition cc = Always);
+
+  void ma_vimm(double value, FloatRegister dest, Condition cc = Always);
+  void ma_vimm_f32(float value, FloatRegister dest, Condition cc = Always);
+
+  void ma_vcmp(FloatRegister src1, FloatRegister src2, Condition cc = Always);
+  void ma_vcmp_f32(FloatRegister src1, FloatRegister src2,
+                   Condition cc = Always);
+  void ma_vcmpz(FloatRegister src1, Condition cc = Always);
+  void ma_vcmpz_f32(FloatRegister src1, Condition cc = Always);
+
+  void ma_vadd_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+  void ma_vsub_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+  void ma_vmul_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+  void ma_vdiv_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+  void ma_vneg_f32(FloatRegister src, FloatRegister dest,
+                   Condition cc = Always);
+
+  // Source is F64, dest is I32:
+  void ma_vcvt_F64_I32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+  void ma_vcvt_F64_U32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+
+  // Source is I32, dest is F64:
+  void ma_vcvt_I32_F64(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+  void ma_vcvt_U32_F64(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+
+  // Source is F32, dest is I32:
+  void ma_vcvt_F32_I32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+  void ma_vcvt_F32_U32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+
+  // Source is I32, dest is F32:
+  void ma_vcvt_I32_F32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+  void ma_vcvt_U32_F32(FloatRegister src, FloatRegister dest,
+                       Condition cc = Always);
+
+  // Transfer (do not coerce) a float into a gpr.
+  void ma_vxfer(VFPRegister src, Register dest, Condition cc = Always);
+  // Transfer (do not coerce) a double into a couple of gpr.
+  void ma_vxfer(VFPRegister src, Register dest1, Register dest2,
+                Condition cc = Always);
+
+  // Transfer (do not coerce) a gpr into a float
+  void ma_vxfer(Register src, FloatRegister dest, Condition cc = Always);
+  // Transfer (do not coerce) a couple of gpr into a double
+  void ma_vxfer(Register src1, Register src2, FloatRegister dest,
+                Condition cc = Always);
+
+  BufferOffset ma_vdtr(LoadStore ls, const Address& addr, VFPRegister dest,
+                       AutoRegisterScope& scratch, Condition cc = Always);
+
+  BufferOffset ma_vldr(VFPAddr addr, VFPRegister dest, Condition cc = Always);
+  BufferOffset ma_vldr(const Address& addr, VFPRegister dest,
+                       AutoRegisterScope& scratch, Condition cc = Always);
+  BufferOffset ma_vldr(VFPRegister src, Register base, Register index,
+                       AutoRegisterScope& scratch, int32_t shift = defaultShift,
+                       Condition cc = Always);
+
+  BufferOffset ma_vstr(VFPRegister src, VFPAddr addr, Condition cc = Always);
+  BufferOffset ma_vstr(VFPRegister src, const Address& addr,
+                       AutoRegisterScope& scratch, Condition cc = Always);
+  BufferOffset ma_vstr(VFPRegister src, Register base, Register index,
+                       AutoRegisterScope& scratch, AutoRegisterScope& scratch2,
+                       int32_t shift, int32_t offset, Condition cc = Always);
+  BufferOffset ma_vstr(VFPRegister src, Register base, Register index,
+                       AutoRegisterScope& scratch, int32_t shift,
+                       Condition cc = Always);
+
+  void ma_call(ImmPtr dest);
+
+  // Float registers can only be loaded/stored in continuous runs when using
+  // vstm/vldm. This function breaks set into continuous runs and loads/stores
+  // them at [rm]. rm will be modified and left in a state logically suitable
+  // for the next load/store. Returns the offset from [dm] for the logical
+  // next load/store.
+  int32_t transferMultipleByRuns(FloatRegisterSet set, LoadStore ls,
+                                 Register rm, DTMMode mode) {
+    if (mode == IA) {
+      return transferMultipleByRunsImpl<FloatRegisterForwardIterator>(
+          set, ls, rm, mode, 1);
+    }
+    if (mode == DB) {
+      return transferMultipleByRunsImpl<FloatRegisterBackwardIterator>(
+          set, ls, rm, mode, -1);
+    }
+    MOZ_CRASH("Invalid data transfer addressing mode");
+  }
+
+  // `outAny` is valid if and only if `out64` == Register64::Invalid().
+  void wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register memoryBase,
+                    Register ptr, Register ptrScratch, AnyRegister outAny,
+                    Register64 out64);
+
+  // `valAny` is valid if and only if `val64` == Register64::Invalid().
+  void wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister valAny,
+                     Register64 val64, Register memoryBase, Register ptr,
+                     Register ptrScratch);
+
+ private:
+  // Implementation for transferMultipleByRuns so we can use different
+  // iterators for forward/backward traversals. The sign argument should be 1
+  // if we traverse forwards, -1 if we traverse backwards.
+  template <typename RegisterIterator>
+  int32_t transferMultipleByRunsImpl(FloatRegisterSet set, LoadStore ls,
+                                     Register rm, DTMMode mode, int32_t sign) {
+    MOZ_ASSERT(sign == 1 || sign == -1);
+
+    int32_t delta = sign * sizeof(float);
+    int32_t offset = 0;
+    // Build up a new set, which is the sum of all of the single and double
+    // registers. This set can have up to 48 registers in it total
+    // s0-s31 and d16-d31
+    FloatRegisterSet mod = set.reduceSetForPush();
+
+    RegisterIterator iter(mod);
+    while (iter.more()) {
+      startFloatTransferM(ls, rm, mode, WriteBack);
+      int32_t reg = (*iter).code();
+      do {
+        offset += delta;
+        if ((*iter).isDouble()) {
+          offset += delta;
+        }
+        transferFloatReg(*iter);
+      } while ((++iter).more() && int32_t((*iter).code()) == (reg += sign));
+      finishFloatTransfer();
+    }
+    return offset;
+  }
+};
+
+class MacroAssembler;
+
+class MacroAssemblerARMCompat : public MacroAssemblerARM {
+ private:
+  // Perform a downcast. Should be removed by Bug 996602.
+  MacroAssembler& asMasm();
+  const MacroAssembler& asMasm() const;
+
+ public:
+  MacroAssemblerARMCompat() {}
+
+ public:
+  // Jumps + other functions that should be called from non-arm specific
+  // code. Basically, an x86 front end on top of the ARM code.
+  void j(Condition code, Label* dest) { as_b(dest, code); }
+  void j(Label* dest) { as_b(dest, Always); }
+
+  void mov(Register src, Register dest) { ma_mov(src, dest); }
+  void mov(ImmWord imm, Register dest) { ma_mov(Imm32(imm.value), dest); }
+  void mov(ImmPtr imm, Register dest) {
+    mov(ImmWord(uintptr_t(imm.value)), dest);
+  }
+
+  void branch(JitCode* c) {
+    BufferOffset bo = m_buffer.nextOffset();
+    addPendingJump(bo, ImmPtr(c->raw()), RelocationKind::JITCODE);
+    ScratchRegisterScope scratch(asMasm());
+    ma_movPatchable(ImmPtr(c->raw()), scratch, Always);
+    ma_bx(scratch);
+  }
+  void branch(const Register reg) { ma_bx(reg); }
+  void nop() { ma_nop(); }
+  void shortJumpSizedNop() { ma_nop(); }
+  void ret() { ma_pop(pc); }
+  void retn(Imm32 n) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_popn_pc(n, scratch, scratch2);
+  }
+  void push(Imm32 imm) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_mov(imm, scratch);
+    ma_push(scratch);
+  }
+  void push(ImmWord imm) { push(Imm32(imm.value)); }
+  void push(ImmGCPtr imm) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_mov(imm, scratch);
+    ma_push(scratch);
+  }
+  void push(const Address& addr) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(addr, scratch, scratch2);
+    ma_push(scratch);
+  }
+  void push(Register reg) {
+    if (reg == sp) {
+      ScratchRegisterScope scratch(asMasm());
+      ma_push_sp(reg, scratch);
+    } else {
+      ma_push(reg);
+    }
+  }
+  void push(FloatRegister reg) { ma_vpush(VFPRegister(reg)); }
+  void pushWithPadding(Register reg, const Imm32 extraSpace) {
+    ScratchRegisterScope scratch(asMasm());
+    Imm32 totSpace = Imm32(extraSpace.value + 4);
+    ma_dtr(IsStore, sp, totSpace, reg, scratch, PreIndex);
+  }
+  void pushWithPadding(Imm32 imm, const Imm32 extraSpace) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    Imm32 totSpace = Imm32(extraSpace.value + 4);
+    ma_mov(imm, scratch);
+    ma_dtr(IsStore, sp, totSpace, scratch, scratch2, PreIndex);
+  }
+
+  void pop(Register reg) { ma_pop(reg); }
+  void pop(FloatRegister reg) { ma_vpop(VFPRegister(reg)); }
+
+  void popN(Register reg, Imm32 extraSpace) {
+    ScratchRegisterScope scratch(asMasm());
+    Imm32 totSpace = Imm32(extraSpace.value + 4);
+    ma_dtr(IsLoad, sp, totSpace, reg, scratch, PostIndex);
+  }
+
+  CodeOffset toggledJump(Label* label);
+
+  // Emit a BLX or NOP instruction. ToggleCall can be used to patch this
+  // instruction.
+  CodeOffset toggledCall(JitCode* target, bool enabled);
+
+  CodeOffset pushWithPatch(ImmWord imm) {
+    ScratchRegisterScope scratch(asMasm());
+    CodeOffset label = movWithPatch(imm, scratch);
+    ma_push(scratch);
+    return label;
+  }
+
+  CodeOffset movWithPatch(ImmWord imm, Register dest) {
+    CodeOffset label = CodeOffset(currentOffset());
+    ma_movPatchable(Imm32(imm.value), dest, Always);
+    return label;
+  }
+  CodeOffset movWithPatch(ImmPtr imm, Register dest) {
+    return movWithPatch(ImmWord(uintptr_t(imm.value)), dest);
+  }
+
+  void jump(Label* label) { as_b(label); }
+  void jump(JitCode* code) { branch(code); }
+  void jump(ImmPtr ptr) {
+    ScratchRegisterScope scratch(asMasm());
+    movePtr(ptr, scratch);
+    ma_bx(scratch);
+  }
+  void jump(TrampolinePtr code) { jump(ImmPtr(code.value)); }
+  void jump(Register reg) { ma_bx(reg); }
+  void jump(const Address& addr) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(addr, scratch, scratch2);
+    ma_bx(scratch);
+  }
+
+  void negl(Register reg) { ma_neg(reg, reg, SetCC); }
+  void test32(Register lhs, Register rhs) { ma_tst(lhs, rhs); }
+  void test32(Register lhs, Imm32 imm) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_tst(lhs, imm, scratch);
+  }
+  void test32(const Address& addr, Imm32 imm) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(addr, scratch, scratch2);
+    ma_tst(scratch, imm, scratch2);
+  }
+  void testPtr(Register lhs, Register rhs) { test32(lhs, rhs); }
+
+  void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag) {
+    MOZ_ASSERT(value.typeReg() == tag);
+  }
+
+  // Higher level tag testing code.
+  Condition testInt32(Condition cond, const ValueOperand& value);
+  Condition testBoolean(Condition cond, const ValueOperand& value);
+  Condition testDouble(Condition cond, const ValueOperand& value);
+  Condition testNull(Condition cond, const ValueOperand& value);
+  Condition testUndefined(Condition cond, const ValueOperand& value);
+  Condition testString(Condition cond, const ValueOperand& value);
+  Condition testSymbol(Condition cond, const ValueOperand& value);
+  Condition testBigInt(Condition cond, const ValueOperand& value);
+  Condition testObject(Condition cond, const ValueOperand& value);
+  Condition testNumber(Condition cond, const ValueOperand& value);
+  Condition testMagic(Condition cond, const ValueOperand& value);
+
+  Condition testPrimitive(Condition cond, const ValueOperand& value);
+  Condition testGCThing(Condition cond, const ValueOperand& value);
+
+  // Register-based tests.
+  Condition testInt32(Condition cond, Register tag);
+  Condition testBoolean(Condition cond, Register tag);
+  Condition testNull(Condition cond, Register tag);
+  Condition testUndefined(Condition cond, Register tag);
+  Condition testString(Condition cond, Register tag);
+  Condition testSymbol(Condition cond, Register tag);
+  Condition testBigInt(Condition cond, Register tag);
+  Condition testObject(Condition cond, Register tag);
+  Condition testDouble(Condition cond, Register tag);
+  Condition testNumber(Condition cond, Register tag);
+  Condition testMagic(Condition cond, Register tag);
+  Condition testPrimitive(Condition cond, Register tag);
+  Condition testGCThing(Condition cond, Register tag);
+
+  Condition testGCThing(Condition cond, const Address& address);
+  Condition testMagic(Condition cond, const Address& address);
+  Condition testInt32(Condition cond, const Address& address);
+  Condition testDouble(Condition cond, const Address& address);
+  Condition testBoolean(Condition cond, const Address& address);
+  Condition testNull(Condition cond, const Address& address);
+  Condition testUndefined(Condition cond, const Address& address);
+  Condition testString(Condition cond, const Address& address);
+  Condition testSymbol(Condition cond, const Address& address);
+  Condition testBigInt(Condition cond, const Address& address);
+  Condition testObject(Condition cond, const Address& address);
+  Condition testNumber(Condition cond, const Address& address);
+
+  Condition testUndefined(Condition cond, const BaseIndex& src);
+  Condition testNull(Condition cond, const BaseIndex& src);
+  Condition testBoolean(Condition cond, const BaseIndex& src);
+  Condition testString(Condition cond, const BaseIndex& src);
+  Condition testSymbol(Condition cond, const BaseIndex& src);
+  Condition testBigInt(Condition cond, const BaseIndex& src);
+  Condition testInt32(Condition cond, const BaseIndex& src);
+  Condition testObject(Condition cond, const BaseIndex& src);
+  Condition testDouble(Condition cond, const BaseIndex& src);
+  Condition testMagic(Condition cond, const BaseIndex& src);
+  Condition testGCThing(Condition cond, const BaseIndex& src);
+
+  // Unboxing code.
+  void unboxNonDouble(const ValueOperand& operand, Register dest,
+                      JSValueType type);
+  void unboxNonDouble(const Address& src, Register dest, JSValueType type);
+  void unboxNonDouble(const BaseIndex& src, Register dest, JSValueType type);
+  void unboxInt32(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_INT32);
+  }
+  void unboxInt32(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_INT32);
+  }
+  void unboxInt32(const BaseIndex& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_INT32);
+  }
+  void unboxBoolean(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN);
+  }
+  void unboxBoolean(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN);
+  }
+  void unboxBoolean(const BaseIndex& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN);
+  }
+  void unboxString(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_STRING);
+  }
+  void unboxString(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_STRING);
+  }
+  void unboxSymbol(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL);
+  }
+  void unboxSymbol(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL);
+  }
+  void unboxBigInt(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT);
+  }
+  void unboxBigInt(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT);
+  }
+  void unboxObject(const ValueOperand& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObject(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObject(const BaseIndex& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObjectOrNull(const ValueOperand& src, Register dest) {
+    // Due to Spectre mitigation logic (see Value.h), if the value is an Object
+    // then this yields the object; otherwise it yields zero (null), as desired.
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObjectOrNull(const Address& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxObjectOrNull(const BaseIndex& src, Register dest) {
+    unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+  }
+  void unboxDouble(const ValueOperand& src, FloatRegister dest);
+  void unboxDouble(const Address& src, FloatRegister dest);
+  void unboxDouble(const BaseIndex& src, FloatRegister dest);
+
+  void unboxValue(const ValueOperand& src, AnyRegister dest, JSValueType type);
+
+  // See comment in MacroAssembler-x64.h.
+  void unboxGCThingForGCBarrier(const Address& src, Register dest) {
+    load32(ToPayload(src), dest);
+  }
+
+  void notBoolean(const ValueOperand& val) {
+    as_eor(val.payloadReg(), val.payloadReg(), Imm8(1));
+  }
+
+  template <typename T>
+  void fallibleUnboxPtrImpl(const T& src, Register dest, JSValueType type,
+                            Label* fail);
+
+  // Boxing code.
+  void boxDouble(FloatRegister src, const ValueOperand& dest, FloatRegister);
+  void boxNonDouble(JSValueType type, Register src, const ValueOperand& dest);
+
+  // Extended unboxing API. If the payload is already in a register, returns
+  // that register. Otherwise, provides a move to the given scratch register,
+  // and returns that.
+  [[nodiscard]] Register extractObject(const Address& address,
+                                       Register scratch);
+  [[nodiscard]] Register extractObject(const ValueOperand& value,
+                                       Register scratch) {
+    unboxNonDouble(value, value.payloadReg(), JSVAL_TYPE_OBJECT);
+    return value.payloadReg();
+  }
+  [[nodiscard]] Register extractSymbol(const ValueOperand& value,
+                                       Register scratch) {
+    unboxNonDouble(value, value.payloadReg(), JSVAL_TYPE_SYMBOL);
+    return value.payloadReg();
+  }
+  [[nodiscard]] Register extractInt32(const ValueOperand& value,
+                                      Register scratch) {
+    return value.payloadReg();
+  }
+  [[nodiscard]] Register extractBoolean(const ValueOperand& value,
+                                        Register scratch) {
+    return value.payloadReg();
+  }
+  [[nodiscard]] Register extractTag(const Address& address, Register scratch);
+  [[nodiscard]] Register extractTag(const BaseIndex& address, Register scratch);
+  [[nodiscard]] Register extractTag(const ValueOperand& value,
+                                    Register scratch) {
+    return value.typeReg();
+  }
+
+  void boolValueToDouble(const ValueOperand& operand, FloatRegister dest);
+  void int32ValueToDouble(const ValueOperand& operand, FloatRegister dest);
+  void loadInt32OrDouble(const Address& src, FloatRegister dest);
+  void loadInt32OrDouble(Register base, Register index, FloatRegister dest,
+                         int32_t shift = defaultShift);
+  void loadConstantDouble(double dp, FloatRegister dest);
+
+  // Treat the value as a boolean, and set condition codes accordingly.
+  Condition testInt32Truthy(bool truthy, const ValueOperand& operand);
+  Condition testBooleanTruthy(bool truthy, const ValueOperand& operand);
+  Condition testDoubleTruthy(bool truthy, FloatRegister reg);
+  Condition testStringTruthy(bool truthy, const ValueOperand& value);
+  Condition testBigIntTruthy(bool truthy, const ValueOperand& value);
+
+  void boolValueToFloat32(const ValueOperand& operand, FloatRegister dest);
+  void int32ValueToFloat32(const ValueOperand& operand, FloatRegister dest);
+  void loadConstantFloat32(float f, FloatRegister dest);
+
+  void loadUnboxedValue(Address address, MIRType type, AnyRegister dest) {
+    if (dest.isFloat()) {
+      loadInt32OrDouble(address, dest.fpu());
+    } else {
+      ScratchRegisterScope scratch(asMasm());
+      ma_ldr(address, dest.gpr(), scratch);
+    }
+  }
+
+  void loadUnboxedValue(BaseIndex address, MIRType type, AnyRegister dest) {
+    if (dest.isFloat()) {
+      loadInt32OrDouble(address.base, address.index, dest.fpu(), address.scale);
+    } else {
+      load32(address, dest.gpr());
+    }
+  }
+
+  template <typename T>
+  void storeUnboxedPayload(ValueOperand value, T address, size_t nbytes,
+                           JSValueType) {
+    switch (nbytes) {
+      case 4:
+        storePtr(value.payloadReg(), address);
+        return;
+      case 1:
+        store8(value.payloadReg(), address);
+        return;
+      default:
+        MOZ_CRASH("Bad payload width");
+    }
+  }
+
+  void storeValue(ValueOperand val, const Address& dst);
+  void storeValue(ValueOperand val, const BaseIndex& dest);
+  void storeValue(JSValueType type, Register reg, BaseIndex dest) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    int32_t payloadoffset = dest.offset + NUNBOX32_PAYLOAD_OFFSET;
+    int32_t typeoffset = dest.offset + NUNBOX32_TYPE_OFFSET;
+
+    ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+
+    // Store the payload.
+    if (payloadoffset < 4096 && payloadoffset > -4096) {
+      ma_str(reg, DTRAddr(scratch, DtrOffImm(payloadoffset)));
+    } else {
+      ma_str(reg, Address(scratch, payloadoffset), scratch2);
+    }
+
+    // Store the type.
+    if (typeoffset < 4096 && typeoffset > -4096) {
+      // Encodable as DTRAddr, so only two instructions needed.
+      ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch2);
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(typeoffset)));
+    } else {
+      // Since there are only two scratch registers, the offset must be
+      // applied early using a third instruction to be safe.
+      ma_add(Imm32(typeoffset), scratch, scratch2);
+      ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch2);
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+    }
+  }
+  void storeValue(JSValueType type, Register reg, Address dest) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_str(reg, dest, scratch2);
+    ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch);
+    ma_str(scratch, Address(dest.base, dest.offset + NUNBOX32_TYPE_OFFSET),
+           scratch2);
+  }
+  void storeValue(const Value& val, const Address& dest) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_mov(Imm32(val.toNunboxTag()), scratch);
+    ma_str(scratch, ToType(dest), scratch2);
+    if (val.isGCThing()) {
+      ma_mov(ImmGCPtr(val.toGCThing()), scratch);
+    } else {
+      ma_mov(Imm32(val.toNunboxPayload()), scratch);
+    }
+    ma_str(scratch, ToPayload(dest), scratch2);
+  }
+  void storeValue(const Value& val, BaseIndex dest) {
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    int32_t typeoffset = dest.offset + NUNBOX32_TYPE_OFFSET;
+    int32_t payloadoffset = dest.offset + NUNBOX32_PAYLOAD_OFFSET;
+
+    ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+
+    // Store the type.
+    if (typeoffset < 4096 && typeoffset > -4096) {
+      ma_mov(Imm32(val.toNunboxTag()), scratch2);
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(typeoffset)));
+    } else {
+      ma_add(Imm32(typeoffset), scratch, scratch2);
+      ma_mov(Imm32(val.toNunboxTag()), scratch2);
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+      // Restore scratch for the payload store.
+      ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+    }
+
+    // Store the payload, marking if necessary.
+    if (payloadoffset < 4096 && payloadoffset > -4096) {
+      if (val.isGCThing()) {
+        ma_mov(ImmGCPtr(val.toGCThing()), scratch2);
+      } else {
+        ma_mov(Imm32(val.toNunboxPayload()), scratch2);
+      }
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(payloadoffset)));
+    } else {
+      ma_add(Imm32(payloadoffset), scratch, scratch2);
+      if (val.isGCThing()) {
+        ma_mov(ImmGCPtr(val.toGCThing()), scratch2);
+      } else {
+        ma_mov(Imm32(val.toNunboxPayload()), scratch2);
+      }
+      ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+    }
+  }
+  void storeValue(const Address& src, const Address& dest, Register temp) {
+    load32(ToType(src), temp);
+    store32(temp, ToType(dest));
+
+    load32(ToPayload(src), temp);
+    store32(temp, ToPayload(dest));
+  }
+
+  void storePrivateValue(Register src, const Address& dest) {
+    store32(Imm32(0), ToType(dest));
+    store32(src, ToPayload(dest));
+  }
+  void storePrivateValue(ImmGCPtr imm, const Address& dest) {
+    store32(Imm32(0), ToType(dest));
+    storePtr(imm, ToPayload(dest));
+  }
+
+  void loadValue(Address src, ValueOperand val);
+  void loadValue(Operand dest, ValueOperand val) {
+    loadValue(dest.toAddress(), val);
+  }
+  void loadValue(const BaseIndex& addr, ValueOperand val);
+
+  // Like loadValue but guaranteed to not use LDRD or LDM instructions (these
+  // don't support unaligned accesses).
+  void loadUnalignedValue(const Address& src, ValueOperand dest);
+
+  void tagValue(JSValueType type, Register payload, ValueOperand dest);
+
+  void pushValue(ValueOperand val);
+  void popValue(ValueOperand val);
+  void pushValue(const Value& val) {
+    push(Imm32(val.toNunboxTag()));
+    if (val.isGCThing()) {
+      push(ImmGCPtr(val.toGCThing()));
+    } else {
+      push(Imm32(val.toNunboxPayload()));
+    }
+  }
+  void pushValue(JSValueType type, Register reg) {
+    push(ImmTag(JSVAL_TYPE_TO_TAG(type)));
+    ma_push(reg);
+  }
+  void pushValue(const Address& addr);
+  void pushValue(const BaseIndex& addr, Register scratch);
+
+  void storePayload(const Value& val, const Address& dest);
+  void storePayload(Register src, const Address& dest);
+  void storePayload(const Value& val, const BaseIndex& dest);
+  void storePayload(Register src, const BaseIndex& dest);
+  void storeTypeTag(ImmTag tag, const Address& dest);
+  void storeTypeTag(ImmTag tag, const BaseIndex& dest);
+
+  void handleFailureWithHandlerTail(Label* profilerExitTail,
+                                    Label* bailoutTail);
+
+  /////////////////////////////////////////////////////////////////
+  // Common interface.
+  /////////////////////////////////////////////////////////////////
+ public:
+  void not32(Register reg);
+
+  void move32(Imm32 imm, Register dest);
+  void move32(Register src, Register dest);
+
+  void movePtr(Register src, Register dest);
+  void movePtr(ImmWord imm, Register dest);
+  void movePtr(ImmPtr imm, Register dest);
+  void movePtr(wasm::SymbolicAddress imm, Register dest);
+  void movePtr(ImmGCPtr imm, Register dest);
+
+  void load8SignExtend(const Address& address, Register dest);
+  void load8SignExtend(const BaseIndex& src, Register dest);
+
+  void load8ZeroExtend(const Address& address, Register dest);
+  void load8ZeroExtend(const BaseIndex& src, Register dest);
+
+  void load16SignExtend(const Address& address, Register dest);
+  void load16SignExtend(const BaseIndex& src, Register dest);
+
+  template <typename S>
+  void load16UnalignedSignExtend(const S& src, Register dest) {
+    // load16SignExtend uses |ldrsh|, which supports unaligned access.
+    load16SignExtend(src, dest);
+  }
+
+  void load16ZeroExtend(const Address& address, Register dest);
+  void load16ZeroExtend(const BaseIndex& src, Register dest);
+
+  template <typename S>
+  void load16UnalignedZeroExtend(const S& src, Register dest) {
+    // load16ZeroExtend uses |ldrh|, which supports unaligned access.
+    load16ZeroExtend(src, dest);
+  }
+
+  void load32(const Address& address, Register dest);
+  void load32(const BaseIndex& address, Register dest);
+  void load32(AbsoluteAddress address, Register dest);
+
+  template <typename S>
+  void load32Unaligned(const S& src, Register dest) {
+    // load32 uses |ldr|, which supports unaligned access.
+    load32(src, dest);
+  }
+
+  void load64(const Address& address, Register64 dest) {
+    bool highBeforeLow = address.base == dest.low;
+    if (highBeforeLow) {
+      load32(HighWord(address), dest.high);
+      load32(LowWord(address), dest.low);
+    } else {
+      load32(LowWord(address), dest.low);
+      load32(HighWord(address), dest.high);
+    }
+  }
+  void load64(const BaseIndex& address, Register64 dest) {
+    // If you run into this, relax your register allocation constraints.
+    MOZ_RELEASE_ASSERT(
+        !((address.base == dest.low || address.base == dest.high) &&
+          (address.index == dest.low || address.index == dest.high)));
+    bool highBeforeLow = address.base == dest.low || address.index == dest.low;
+    if (highBeforeLow) {
+      load32(HighWord(address), dest.high);
+      load32(LowWord(address), dest.low);
+    } else {
+      load32(LowWord(address), dest.low);
+      load32(HighWord(address), dest.high);
+    }
+  }
+
+  template <typename S>
+  void load64Unaligned(const S& src, Register64 dest) {
+    // load64 calls load32, which supports unaligned accesses.
+    load64(src, dest);
+  }
+
+  void loadPtr(const Address& address, Register dest);
+  void loadPtr(const BaseIndex& src, Register dest);
+  void loadPtr(AbsoluteAddress address, Register dest);
+  void loadPtr(wasm::SymbolicAddress address, Register dest);
+
+  void loadPrivate(const Address& address, Register dest);
+
+  void loadDouble(const Address& addr, FloatRegister dest);
+  void loadDouble(const BaseIndex& src, FloatRegister dest);
+
+  // Load a float value into a register, then expand it to a double.
+  void loadFloatAsDouble(const Address& addr, FloatRegister dest);
+  void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest);
+
+  void loadFloat32(const Address& addr, FloatRegister dest);
+  void loadFloat32(const BaseIndex& src, FloatRegister dest);
+
+  void store8(Register src, const Address& address);
+  void store8(Imm32 imm, const Address& address);
+  void store8(Register src, const BaseIndex& address);
+  void store8(Imm32 imm, const BaseIndex& address);
+
+  void store16(Register src, const Address& address);
+  void store16(Imm32 imm, const Address& address);
+  void store16(Register src, const BaseIndex& address);
+  void store16(Imm32 imm, const BaseIndex& address);
+
+  template <typename S, typename T>
+  void store16Unaligned(const S& src, const T& dest) {
+    // store16 uses |strh|, which supports unaligned access.
+    store16(src, dest);
+  }
+
+  void store32(Register src, AbsoluteAddress address);
+  void store32(Register src, const Address& address);
+  void store32(Register src, const BaseIndex& address);
+  void store32(Imm32 src, const Address& address);
+  void store32(Imm32 src, const BaseIndex& address);
+
+  template <typename S, typename T>
+  void store32Unaligned(const S& src, const T& dest) {
+    // store32 uses |str|, which supports unaligned access.
+    store32(src, dest);
+  }
+
+  void store64(Register64 src, Address address) {
+    store32(src.low, LowWord(address));
+    store32(src.high, HighWord(address));
+  }
+
+  void store64(Register64 src, const BaseIndex& address) {
+    store32(src.low, LowWord(address));
+    store32(src.high, HighWord(address));
+  }
+
+  void store64(Imm64 imm, Address address) {
+    store32(imm.low(), LowWord(address));
+    store32(imm.hi(), HighWord(address));
+  }
+
+  void store64(Imm64 imm, const BaseIndex& address) {
+    store32(imm.low(), LowWord(address));
+    store32(imm.hi(), HighWord(address));
+  }
+
+  template <typename S, typename T>
+  void store64Unaligned(const S& src, const T& dest) {
+    // store64 calls store32, which supports unaligned access.
+    store64(src, dest);
+  }
+
+  void storePtr(ImmWord imm, const Address& address);
+  void storePtr(ImmWord imm, const BaseIndex& address);
+  void storePtr(ImmPtr imm, const Address& address);
+  void storePtr(ImmPtr imm, const BaseIndex& address);
+  void storePtr(ImmGCPtr imm, const Address& address);
+  void storePtr(ImmGCPtr imm, const BaseIndex& address);
+  void storePtr(Register src, const Address& address);
+  void storePtr(Register src, const BaseIndex& address);
+  void storePtr(Register src, AbsoluteAddress dest);
+
+  void moveDouble(FloatRegister src, FloatRegister dest,
+                  Condition cc = Always) {
+    ma_vmov(src, dest, cc);
+  }
+
+  inline void incrementInt32Value(const Address& addr);
+
+  void cmp32(Register lhs, Imm32 rhs);
+  void cmp32(Register lhs, Register rhs);
+  void cmp32(const Address& lhs, Imm32 rhs);
+  void cmp32(const Address& lhs, Register rhs);
+
+  void cmpPtr(Register lhs, Register rhs);
+  void cmpPtr(Register lhs, ImmWord rhs);
+  void cmpPtr(Register lhs, ImmPtr rhs);
+  void cmpPtr(Register lhs, ImmGCPtr rhs);
+  void cmpPtr(Register lhs, Imm32 rhs);
+  void cmpPtr(const Address& lhs, Register rhs);
+  void cmpPtr(const Address& lhs, ImmWord rhs);
+  void cmpPtr(const Address& lhs, ImmPtr rhs);
+  void cmpPtr(const Address& lhs, ImmGCPtr rhs);
+  void cmpPtr(const Address& lhs, Imm32 rhs);
+
+  void setStackArg(Register reg, uint32_t arg);
+
+  void breakpoint();
+  // Conditional breakpoint.
+  void breakpoint(Condition cc);
+
+  // Trigger the simulator's interactive read-eval-print loop.
+  // The message will be printed at the stopping point.
+  // (On non-simulator builds, does nothing.)
+  void simulatorStop(const char* msg);
+
+  // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, other).
+  // Checks for NaN if canBeNaN is true.
+  void minMaxDouble(FloatRegister srcDest, FloatRegister other, bool canBeNaN,
+                    bool isMax);
+  void minMaxFloat32(FloatRegister srcDest, FloatRegister other, bool canBeNaN,
+                     bool isMax);
+
+  void compareDouble(FloatRegister lhs, FloatRegister rhs);
+
+  void compareFloat(FloatRegister lhs, FloatRegister rhs);
+
+  void checkStackAlignment();
+
+  // If source is a double, load it into dest. If source is int32, convert it
+  // to double. Else, branch to failure.
+  void ensureDouble(const ValueOperand& source, FloatRegister dest,
+                    Label* failure);
+
+  void emitSet(Assembler::Condition cond, Register dest) {
+    ma_mov(Imm32(0), dest);
+    ma_mov(Imm32(1), dest, cond);
+  }
+
+  void testNullSet(Condition cond, const ValueOperand& value, Register dest) {
+    cond = testNull(cond, value);
+    emitSet(cond, dest);
+  }
+
+  void testObjectSet(Condition cond, const ValueOperand& value, Register dest) {
+    cond = testObject(cond, value);
+    emitSet(cond, dest);
+  }
+
+  void testUndefinedSet(Condition cond, const ValueOperand& value,
+                        Register dest) {
+    cond = testUndefined(cond, value);
+    emitSet(cond, dest);
+  }
+
+ protected:
+  bool buildOOLFakeExitFrame(void* fakeReturnAddr);
+
+ public:
+  void computeEffectiveAddress(const Address& address, Register dest) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_add(address.base, Imm32(address.offset), dest, scratch, LeaveCC);
+  }
+  void computeEffectiveAddress(const BaseIndex& address, Register dest) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_alu(address.base, lsl(address.index, address.scale), dest, OpAdd,
+           LeaveCC);
+    if (address.offset) {
+      ma_add(dest, Imm32(address.offset), dest, scratch, LeaveCC);
+    }
+  }
+  void floor(FloatRegister input, Register output, Label* handleNotAnInt);
+  void floorf(FloatRegister input, Register output, Label* handleNotAnInt);
+  void ceil(FloatRegister input, Register output, Label* handleNotAnInt);
+  void ceilf(FloatRegister input, Register output, Label* handleNotAnInt);
+  void round(FloatRegister input, Register output, Label* handleNotAnInt,
+             FloatRegister tmp);
+  void roundf(FloatRegister input, Register output, Label* handleNotAnInt,
+              FloatRegister tmp);
+  void trunc(FloatRegister input, Register output, Label* handleNotAnInt);
+  void truncf(FloatRegister input, Register output, Label* handleNotAnInt);
+
+  void lea(Operand addr, Register dest) {
+    ScratchRegisterScope scratch(asMasm());
+    ma_add(addr.baseReg(), Imm32(addr.disp()), dest, scratch);
+  }
+
+  void abiret() { as_bx(lr); }
+
+  void moveFloat32(FloatRegister src, FloatRegister dest,
+                   Condition cc = Always) {
+    as_vmov(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(),
+            cc);
+  }
+
+  // Instrumentation for entering and leaving the profiler.
+  void profilerEnterFrame(Register framePtr, Register scratch);
+  void profilerExitFrame();
+};
+
+typedef MacroAssemblerARMCompat MacroAssemblerSpecific;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_MacroAssembler_arm_h */
diff --git a/js/src/jit/arm/MoveEmitter-arm.cpp b/js/src/jit/arm/MoveEmitter-arm.cpp
new file mode 100644
index 0000000000..1807c41b50
--- /dev/null
+++ b/js/src/jit/arm/MoveEmitter-arm.cpp
@@ -0,0 +1,413 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/MoveEmitter-arm.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+MoveEmitterARM::MoveEmitterARM(MacroAssembler& masm)
+    : inCycle_(0),
+      masm(masm),
+      pushedAtCycle_(-1),
+      pushedAtSpill_(-1),
+      spilledReg_(InvalidReg),
+      spilledFloatReg_(InvalidFloatReg) {
+  pushedAtStart_ = masm.framePushed();
+}
+
+void MoveEmitterARM::emit(const MoveResolver& moves) {
+  if (moves.numCycles()) {
+    // Reserve stack for cycle resolution
+    static_assert(SpillSlotSize == 8);
+    masm.reserveStack(moves.numCycles() * SpillSlotSize);
+    pushedAtCycle_ = masm.framePushed();
+  }
+
+  for (size_t i = 0; i < moves.numMoves(); i++) {
+    emit(moves.getMove(i));
+  }
+}
+
+MoveEmitterARM::~MoveEmitterARM() { assertDone(); }
+
+Address MoveEmitterARM::cycleSlot(uint32_t slot, uint32_t subslot) const {
+  int32_t offset = masm.framePushed() - pushedAtCycle_;
+  MOZ_ASSERT(offset < 4096 && offset > -4096);
+  return Address(StackPointer, offset + slot * sizeof(double) + subslot);
+}
+
+Address MoveEmitterARM::spillSlot() const {
+  int32_t offset = masm.framePushed() - pushedAtSpill_;
+  MOZ_ASSERT(offset < 4096 && offset > -4096);
+  return Address(StackPointer, offset);
+}
+
+Address MoveEmitterARM::toAddress(const MoveOperand& operand) const {
+  MOZ_ASSERT(operand.isMemoryOrEffectiveAddress());
+
+  if (operand.base() != StackPointer) {
+    return Address(operand.base(), operand.disp());
+  }
+
+  MOZ_ASSERT(operand.disp() >= 0);
+
+  // Otherwise, the stack offset may need to be adjusted.
+  return Address(StackPointer,
+                 operand.disp() + (masm.framePushed() - pushedAtStart_));
+}
+
+Register MoveEmitterARM::tempReg() {
+  if (spilledReg_ != InvalidReg) {
+    return spilledReg_;
+  }
+
+  // For now, just pick r12/ip as the eviction point. This is totally random,
+  // and if it ends up being bad, we can use actual heuristics later. r12 is
+  // actually a bad choice. It is the scratch register, which is frequently
+  // used for address computations, such as those found when we attempt to
+  // access values more than 4096 off of the stack pointer. Instead, use lr,
+  // the LinkRegister.
+  spilledReg_ = r14;
+  if (pushedAtSpill_ == -1) {
+    masm.Push(spilledReg_);
+    pushedAtSpill_ = masm.framePushed();
+  } else {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_str(spilledReg_, spillSlot(), scratch);
+  }
+  return spilledReg_;
+}
+
+void MoveEmitterARM::breakCycle(const MoveOperand& from, const MoveOperand& to,
+                                MoveOp::Type type, uint32_t slotId) {
+  // There is some pattern:
+  //   (A -> B)
+  //   (B -> A)
+  //
+  // This case handles (A -> B), which we reach first. We save B, then allow
+  // the original move to continue.
+
+  ScratchRegisterScope scratch(masm);
+
+  switch (type) {
+    case MoveOp::FLOAT32:
+      if (to.isMemory()) {
+        ScratchFloat32Scope scratchFloat32(masm);
+        masm.ma_vldr(toAddress(to), scratchFloat32, scratch);
+        // Since it is uncertain if the load will be aligned or not
+        // just fill both of them with the same value.
+        masm.ma_vstr(scratchFloat32, cycleSlot(slotId, 0), scratch);
+        masm.ma_vstr(scratchFloat32, cycleSlot(slotId, 4), scratch);
+      } else if (to.isGeneralReg()) {
+        // Since it is uncertain if the load will be aligned or not
+        // just fill both of them with the same value.
+        masm.ma_str(to.reg(), cycleSlot(slotId, 0), scratch);
+        masm.ma_str(to.reg(), cycleSlot(slotId, 4), scratch);
+      } else {
+        FloatRegister src = to.floatReg();
+        // Just always store the largest possible size. Currently, this is
+        // a double. When SIMD is added, two doubles will need to be stored.
+        masm.ma_vstr(src.doubleOverlay(), cycleSlot(slotId, 0), scratch);
+      }
+      break;
+    case MoveOp::DOUBLE:
+      if (to.isMemory()) {
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vldr(toAddress(to), scratchDouble, scratch);
+        masm.ma_vstr(scratchDouble, cycleSlot(slotId, 0), scratch);
+      } else if (to.isGeneralRegPair()) {
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vxfer(to.evenReg(), to.oddReg(), scratchDouble);
+        masm.ma_vstr(scratchDouble, cycleSlot(slotId, 0), scratch);
+      } else {
+        masm.ma_vstr(to.floatReg().doubleOverlay(), cycleSlot(slotId, 0),
+                     scratch);
+      }
+      break;
+    case MoveOp::INT32:
+    case MoveOp::GENERAL:
+      // an non-vfp value
+      if (to.isMemory()) {
+        Register temp = tempReg();
+        masm.ma_ldr(toAddress(to), temp, scratch);
+        masm.ma_str(temp, cycleSlot(0, 0), scratch);
+      } else {
+        if (to.reg() == spilledReg_) {
+          // If the destination was spilled, restore it first.
+          masm.ma_ldr(spillSlot(), spilledReg_, scratch);
+          spilledReg_ = InvalidReg;
+        }
+        masm.ma_str(to.reg(), cycleSlot(0, 0), scratch);
+      }
+      break;
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
+
+void MoveEmitterARM::completeCycle(const MoveOperand& from,
+                                   const MoveOperand& to, MoveOp::Type type,
+                                   uint32_t slotId) {
+  // There is some pattern:
+  //   (A -> B)
+  //   (B -> A)
+  //
+  // This case handles (B -> A), which we reach last. We emit a move from the
+  // saved value of B, to A.
+
+  ScratchRegisterScope scratch(masm);
+
+  switch (type) {
+    case MoveOp::FLOAT32:
+      MOZ_ASSERT(!to.isGeneralRegPair());
+      if (to.isMemory()) {
+        ScratchFloat32Scope scratchFloat32(masm);
+        masm.ma_vldr(cycleSlot(slotId, 0), scratchFloat32, scratch);
+        masm.ma_vstr(scratchFloat32, toAddress(to), scratch);
+      } else if (to.isGeneralReg()) {
+        MOZ_ASSERT(type == MoveOp::FLOAT32);
+        masm.ma_ldr(toAddress(from), to.reg(), scratch);
+      } else {
+        uint32_t offset = 0;
+        if ((!from.isMemory()) && from.floatReg().numAlignedAliased() == 1) {
+          offset = sizeof(float);
+        }
+        masm.ma_vldr(cycleSlot(slotId, offset), to.floatReg(), scratch);
+      }
+      break;
+    case MoveOp::DOUBLE:
+      MOZ_ASSERT(!to.isGeneralReg());
+      if (to.isMemory()) {
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vldr(cycleSlot(slotId, 0), scratchDouble, scratch);
+        masm.ma_vstr(scratchDouble, toAddress(to), scratch);
+      } else if (to.isGeneralRegPair()) {
+        MOZ_ASSERT(type == MoveOp::DOUBLE);
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vldr(toAddress(from), scratchDouble, scratch);
+        masm.ma_vxfer(scratchDouble, to.evenReg(), to.oddReg());
+      } else {
+        uint32_t offset = 0;
+        if ((!from.isMemory()) && from.floatReg().numAlignedAliased() == 1) {
+          offset = sizeof(float);
+        }
+        masm.ma_vldr(cycleSlot(slotId, offset), to.floatReg(), scratch);
+      }
+      break;
+    case MoveOp::INT32:
+    case MoveOp::GENERAL:
+      MOZ_ASSERT(slotId == 0);
+      if (to.isMemory()) {
+        Register temp = tempReg();
+        masm.ma_ldr(cycleSlot(slotId, 0), temp, scratch);
+        masm.ma_str(temp, toAddress(to), scratch);
+      } else {
+        if (to.reg() == spilledReg_) {
+          // Make sure we don't re-clobber the spilled register later.
+          spilledReg_ = InvalidReg;
+        }
+        masm.ma_ldr(cycleSlot(slotId, 0), to.reg(), scratch);
+      }
+      break;
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
+
+void MoveEmitterARM::emitMove(const MoveOperand& from, const MoveOperand& to) {
+  // Register pairs are used to store Double values during calls.
+  MOZ_ASSERT(!from.isGeneralRegPair());
+  MOZ_ASSERT(!to.isGeneralRegPair());
+
+  ScratchRegisterScope scratch(masm);
+
+  if (to.isGeneralReg() && to.reg() == spilledReg_) {
+    // If the destination is the spilled register, make sure we
+    // don't re-clobber its value.
+    spilledReg_ = InvalidReg;
+  }
+
+  if (from.isGeneralReg()) {
+    if (from.reg() == spilledReg_) {
+      // If the source is a register that has been spilled, make sure
+      // to load the source back into that register.
+      masm.ma_ldr(spillSlot(), spilledReg_, scratch);
+      spilledReg_ = InvalidReg;
+    }
+    if (to.isMemoryOrEffectiveAddress()) {
+      masm.ma_str(from.reg(), toAddress(to), scratch);
+    } else {
+      masm.ma_mov(from.reg(), to.reg());
+    }
+  } else if (to.isGeneralReg()) {
+    MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
+    if (from.isMemory()) {
+      masm.ma_ldr(toAddress(from), to.reg(), scratch);
+    } else {
+      masm.ma_add(from.base(), Imm32(from.disp()), to.reg(), scratch);
+    }
+  } else {
+    // Memory to memory gpr move.
+    Register reg = tempReg();
+
+    MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
+    if (from.isMemory()) {
+      masm.ma_ldr(toAddress(from), reg, scratch);
+    } else {
+      masm.ma_add(from.base(), Imm32(from.disp()), reg, scratch);
+    }
+    MOZ_ASSERT(to.base() != reg);
+    masm.ma_str(reg, toAddress(to), scratch);
+  }
+}
+
+void MoveEmitterARM::emitFloat32Move(const MoveOperand& from,
+                                     const MoveOperand& to) {
+  // Register pairs are used to store Double values during calls.
+  MOZ_ASSERT(!from.isGeneralRegPair());
+  MOZ_ASSERT(!to.isGeneralRegPair());
+
+  ScratchRegisterScope scratch(masm);
+
+  if (from.isFloatReg()) {
+    if (to.isFloatReg()) {
+      masm.ma_vmov_f32(from.floatReg(), to.floatReg());
+    } else if (to.isGeneralReg()) {
+      masm.ma_vxfer(from.floatReg(), to.reg());
+    } else {
+      masm.ma_vstr(VFPRegister(from.floatReg()).singleOverlay(), toAddress(to),
+                   scratch);
+    }
+  } else if (from.isGeneralReg()) {
+    if (to.isFloatReg()) {
+      masm.ma_vxfer(from.reg(), to.floatReg());
+    } else if (to.isGeneralReg()) {
+      masm.ma_mov(from.reg(), to.reg());
+    } else {
+      masm.ma_str(from.reg(), toAddress(to), scratch);
+    }
+  } else if (to.isFloatReg()) {
+    masm.ma_vldr(toAddress(from), VFPRegister(to.floatReg()).singleOverlay(),
+                 scratch);
+  } else if (to.isGeneralReg()) {
+    masm.ma_ldr(toAddress(from), to.reg(), scratch);
+  } else {
+    // Memory to memory move.
+    MOZ_ASSERT(from.isMemory());
+    ScratchFloat32Scope scratchFloat32(masm);
+    masm.ma_vldr(toAddress(from), scratchFloat32, scratch);
+    masm.ma_vstr(scratchFloat32, toAddress(to), scratch);
+  }
+}
+
+void MoveEmitterARM::emitDoubleMove(const MoveOperand& from,
+                                    const MoveOperand& to) {
+  // Registers are used to store pointers / int32 / float32 values.
+  MOZ_ASSERT(!from.isGeneralReg());
+  MOZ_ASSERT(!to.isGeneralReg());
+
+  ScratchRegisterScope scratch(masm);
+
+  if (from.isFloatReg()) {
+    if (to.isFloatReg()) {
+      masm.ma_vmov(from.floatReg(), to.floatReg());
+    } else if (to.isGeneralRegPair()) {
+      masm.ma_vxfer(from.floatReg(), to.evenReg(), to.oddReg());
+    } else {
+      masm.ma_vstr(from.floatReg(), toAddress(to), scratch);
+    }
+  } else if (from.isGeneralRegPair()) {
+    if (to.isFloatReg()) {
+      masm.ma_vxfer(from.evenReg(), from.oddReg(), to.floatReg());
+    } else if (to.isGeneralRegPair()) {
+      MOZ_ASSERT(!from.aliases(to));
+      masm.ma_mov(from.evenReg(), to.evenReg());
+      masm.ma_mov(from.oddReg(), to.oddReg());
+    } else {
+      ScratchDoubleScope scratchDouble(masm);
+      masm.ma_vxfer(from.evenReg(), from.oddReg(), scratchDouble);
+      masm.ma_vstr(scratchDouble, toAddress(to), scratch);
+    }
+  } else if (to.isFloatReg()) {
+    masm.ma_vldr(toAddress(from), to.floatReg(), scratch);
+  } else if (to.isGeneralRegPair()) {
+    MOZ_ASSERT(from.isMemory());
+    Address src = toAddress(from);
+    // Note: We can safely use the MoveOperand's displacement here,
+    // even if the base is SP: MoveEmitter::toOperand adjusts
+    // SP-relative operands by the difference between the current
+    // stack usage and stackAdjust, which emitter.finish() resets to
+    // 0.
+    //
+    // Warning: if the offset isn't within [-255,+255] then this
+    // will assert-fail (or, if non-debug, load the wrong words).
+    // Nothing uses such an offset at the time of this writing.
+    masm.ma_ldrd(EDtrAddr(src.base, EDtrOffImm(src.offset)), to.evenReg(),
+                 to.oddReg());
+  } else {
+    // Memory to memory move.
+    MOZ_ASSERT(from.isMemory());
+    ScratchDoubleScope scratchDouble(masm);
+    masm.ma_vldr(toAddress(from), scratchDouble, scratch);
+    masm.ma_vstr(scratchDouble, toAddress(to), scratch);
+  }
+}
+
+void MoveEmitterARM::emit(const MoveOp& move) {
+  const MoveOperand& from = move.from();
+  const MoveOperand& to = move.to();
+
+  if (move.isCycleEnd() && move.isCycleBegin()) {
+    // A fun consequence of aliased registers is you can have multiple
+    // cycles at once, and one can end exactly where another begins.
+    breakCycle(from, to, move.endCycleType(), move.cycleBeginSlot());
+    completeCycle(from, to, move.type(), move.cycleEndSlot());
+    return;
+  }
+
+  if (move.isCycleEnd()) {
+    MOZ_ASSERT(inCycle_);
+    completeCycle(from, to, move.type(), move.cycleEndSlot());
+    MOZ_ASSERT(inCycle_ > 0);
+    inCycle_--;
+    return;
+  }
+
+  if (move.isCycleBegin()) {
+    breakCycle(from, to, move.endCycleType(), move.cycleBeginSlot());
+    inCycle_++;
+  }
+
+  switch (move.type()) {
+    case MoveOp::FLOAT32:
+      emitFloat32Move(from, to);
+      break;
+    case MoveOp::DOUBLE:
+      emitDoubleMove(from, to);
+      break;
+    case MoveOp::INT32:
+    case MoveOp::GENERAL:
+      emitMove(from, to);
+      break;
+    default:
+      MOZ_CRASH("Unexpected move type");
+  }
+}
+
+void MoveEmitterARM::assertDone() { MOZ_ASSERT(inCycle_ == 0); }
+
+void MoveEmitterARM::finish() {
+  assertDone();
+
+  if (pushedAtSpill_ != -1 && spilledReg_ != InvalidReg) {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_ldr(spillSlot(), spilledReg_, scratch);
+  }
+  masm.freeStack(masm.framePushed() - pushedAtStart_);
+}
diff --git a/js/src/jit/arm/MoveEmitter-arm.h b/js/src/jit/arm/MoveEmitter-arm.h
new file mode 100644
index 0000000000..26a84fdbcc
--- /dev/null
+++ b/js/src/jit/arm/MoveEmitter-arm.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_MoveEmitter_arm_h
+#define jit_arm_MoveEmitter_arm_h
+
+#include <stdint.h>
+
+#include "jit/MoveResolver.h"
+#include "jit/Registers.h"
+
+namespace js {
+namespace jit {
+
+struct Address;
+class MacroAssembler;
+
+class MoveEmitterARM {
+  uint32_t inCycle_;
+  MacroAssembler& masm;
+
+  // Original stack push value.
+  uint32_t pushedAtStart_;
+
+  // These store stack offsets to spill locations, snapshotting
+  // codegen->framePushed_ at the time they were allocated. They are -1 if no
+  // stack space has been allocated for that particular spill.
+  int32_t pushedAtCycle_;
+  int32_t pushedAtSpill_;
+
+  // These are registers that are available for temporary use. They may be
+  // assigned InvalidReg. If no corresponding spill space has been assigned,
+  // then these registers do not need to be spilled.
+  Register spilledReg_;
+  FloatRegister spilledFloatReg_;
+
+  void assertDone();
+  Register tempReg();
+  FloatRegister tempFloatReg();
+  Address cycleSlot(uint32_t slot, uint32_t subslot) const;
+  Address spillSlot() const;
+  Address toAddress(const MoveOperand& operand) const;
+
+  void emitMove(const MoveOperand& from, const MoveOperand& to);
+  void emitFloat32Move(const MoveOperand& from, const MoveOperand& to);
+  void emitDoubleMove(const MoveOperand& from, const MoveOperand& to);
+  void breakCycle(const MoveOperand& from, const MoveOperand& to,
+                  MoveOp::Type type, uint32_t slot);
+  void completeCycle(const MoveOperand& from, const MoveOperand& to,
+                     MoveOp::Type type, uint32_t slot);
+  void emit(const MoveOp& move);
+
+ public:
+  explicit MoveEmitterARM(MacroAssembler& masm);
+  ~MoveEmitterARM();
+  void emit(const MoveResolver& moves);
+  void finish();
+
+  void setScratchRegister(Register reg) {}
+};
+
+typedef MoveEmitterARM MoveEmitter;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_MoveEmitter_arm_h */
diff --git a/js/src/jit/arm/SharedICHelpers-arm-inl.h b/js/src/jit/arm/SharedICHelpers-arm-inl.h
new file mode 100644
index 0000000000..2943bafbd8
--- /dev/null
+++ b/js/src/jit/arm/SharedICHelpers-arm-inl.h
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_SharedICHelpers_arm_inl_h
+#define jit_arm_SharedICHelpers_arm_inl_h
+
+#include "jit/BaselineFrame.h"
+#include "jit/SharedICHelpers.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+namespace js {
+namespace jit {
+
+inline void EmitBaselineTailCallVM(TrampolinePtr target, MacroAssembler& masm,
+                                   uint32_t argSize) {
+#ifdef DEBUG
+  // We assume during this that R0 and R1 have been pushed, and that R2 is
+  // unused.
+  static_assert(R2 == ValueOperand(r1, r0));
+
+  // Store frame size without VMFunction arguments for debug assertions.
+  masm.movePtr(FramePointer, r0);
+  masm.ma_sub(StackPointer, r0);
+  masm.sub32(Imm32(argSize), r0);
+  Address frameSizeAddr(FramePointer,
+                        BaselineFrame::reverseOffsetOfDebugFrameSize());
+  masm.store32(r0, frameSizeAddr);
+#endif
+
+  // Push frame descriptor and perform the tail call.
+  // ICTailCallReg (lr) already contains the return address (as we keep
+  // it there through the stub calls), but the VMWrapper code being called
+  // expects the return address to also be pushed on the stack.
+  static_assert(ICTailCallReg == lr);
+  masm.pushFrameDescriptor(FrameType::BaselineJS);
+  masm.push(lr);
+  masm.jump(target);
+}
+
+inline void EmitBaselineCallVM(TrampolinePtr target, MacroAssembler& masm) {
+  masm.pushFrameDescriptor(FrameType::BaselineStub);
+  masm.call(target);
+}
+
+inline void EmitBaselineEnterStubFrame(MacroAssembler& masm, Register scratch) {
+  MOZ_ASSERT(scratch != ICTailCallReg);
+
+#ifdef DEBUG
+  // Compute frame size.
+  masm.mov(FramePointer, scratch);
+  masm.ma_sub(StackPointer, scratch);
+
+  Address frameSizeAddr(FramePointer,
+                        BaselineFrame::reverseOffsetOfDebugFrameSize());
+  masm.store32(scratch, frameSizeAddr);
+#endif
+
+  // Push frame descriptor and return address.
+  masm.PushFrameDescriptor(FrameType::BaselineJS);
+  masm.Push(ICTailCallReg);
+
+  // Save old frame pointer, stack pointer and stub reg.
+  masm.Push(FramePointer);
+  masm.mov(StackPointer, FramePointer);
+
+  masm.Push(ICStubReg);
+
+  // We pushed 4 words, so the stack is still aligned to 8 bytes.
+  masm.checkStackAlignment();
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_SharedICHelpers_arm_inl_h */
diff --git a/js/src/jit/arm/SharedICHelpers-arm.h b/js/src/jit/arm/SharedICHelpers-arm.h
new file mode 100644
index 0000000000..93475abc62
--- /dev/null
+++ b/js/src/jit/arm/SharedICHelpers-arm.h
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_SharedICHelpers_arm_h
+#define jit_arm_SharedICHelpers_arm_h
+
+#include "jit/BaselineIC.h"
+#include "jit/JitFrames.h"
+#include "jit/MacroAssembler.h"
+#include "jit/SharedICRegisters.h"
+
+namespace js {
+namespace jit {
+
+// Distance from sp to the top Value inside an IC stub (no return address on the
+// stack on ARM).
+static const size_t ICStackValueOffset = 0;
+
+inline void EmitRestoreTailCallReg(MacroAssembler& masm) {
+  // No-op on ARM because link register is always holding the return address.
+}
+
+inline void EmitRepushTailCallReg(MacroAssembler& masm) {
+  // No-op on ARM because link register is always holding the return address.
+}
+
+inline void EmitCallIC(MacroAssembler& masm, CodeOffset* callOffset) {
+  // The stub pointer must already be in ICStubReg.
+  // Load stubcode pointer from the ICStub.
+  // R2 won't be active when we call ICs, so we can use r0.
+  static_assert(R2 == ValueOperand(r1, r0));
+  masm.loadPtr(Address(ICStubReg, ICStub::offsetOfStubCode()), r0);
+
+  // Call the stubcode via a direct branch-and-link.
+  masm.ma_blx(r0);
+  *callOffset = CodeOffset(masm.currentOffset());
+}
+
+inline void EmitReturnFromIC(MacroAssembler& masm) { masm.ma_mov(lr, pc); }
+
+inline void EmitBaselineLeaveStubFrame(MacroAssembler& masm) {
+  Address stubAddr(FramePointer, BaselineStubFrameLayout::ICStubOffsetFromFP);
+  masm.loadPtr(stubAddr, ICStubReg);
+
+  masm.mov(FramePointer, StackPointer);
+  masm.Pop(FramePointer);
+
+  // Load the return address.
+  masm.Pop(ICTailCallReg);
+
+  // Discard the frame descriptor.
+  ScratchRegisterScope scratch(masm);
+  masm.Pop(scratch);
+}
+
+template <typename AddrType>
+inline void EmitPreBarrier(MacroAssembler& masm, const AddrType& addr,
+                           MIRType type) {
+  // On ARM, lr is clobbered by guardedCallPreBarrier. Save it first.
+  masm.push(lr);
+  masm.guardedCallPreBarrier(addr, type);
+  masm.pop(lr);
+}
+
+inline void EmitStubGuardFailure(MacroAssembler& masm) {
+  // Load next stub into ICStubReg.
+  masm.loadPtr(Address(ICStubReg, ICCacheIRStub::offsetOfNext()), ICStubReg);
+
+  // Return address is already loaded, just jump to the next stubcode.
+  static_assert(ICTailCallReg == lr);
+  masm.jump(Address(ICStubReg, ICStub::offsetOfStubCode()));
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_SharedICHelpers_arm_h */
diff --git a/js/src/jit/arm/SharedICRegisters-arm.h b/js/src/jit/arm/SharedICRegisters-arm.h
new file mode 100644
index 0000000000..16aabbf0b3
--- /dev/null
+++ b/js/src/jit/arm/SharedICRegisters-arm.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm_SharedICRegisters_arm_h
+#define jit_arm_SharedICRegisters_arm_h
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/Registers.h"
+#include "jit/RegisterSets.h"
+
+namespace js {
+namespace jit {
+
+// r15 = program-counter
+// r14 = link-register
+// r13 = stack-pointer
+// r11 = frame-pointer
+
+// ValueOperands R0, R1, and R2.
+// R0 == JSReturnReg, and R2 uses registers not preserved across calls. R1 value
+// should be preserved across calls.
+static constexpr ValueOperand R0(r3, r2);
+static constexpr ValueOperand R1(r5, r4);
+static constexpr ValueOperand R2(r1, r0);
+
+// ICTailCallReg and ICStubReg
+// These use registers that are not preserved across calls.
+static constexpr Register ICTailCallReg = r14;
+static constexpr Register ICStubReg = r9;
+
+// Register used internally by MacroAssemblerARM.
+static constexpr Register BaselineSecondScratchReg = r6;
+
+// R7 - R9 are generally available for use within stubcode.
+
+// Note that ICTailCallReg is actually just the link register. In ARM code
+// emission, we do not clobber ICTailCallReg since we keep the return
+// address for calls there.
+
+// FloatReg0 must be equal to ReturnFloatReg.
+static constexpr FloatRegister FloatReg0 = d0;
+static constexpr FloatRegister FloatReg1 = d1;
+static constexpr FloatRegister FloatReg2 = d2;
+static constexpr FloatRegister FloatReg3 = d3;
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* jit_arm_SharedICRegisters_arm_h */
diff --git a/js/src/jit/arm/Simulator-arm.cpp b/js/src/jit/arm/Simulator-arm.cpp
new file mode 100644
index 0000000000..2afd6cb0de
--- /dev/null
+++ b/js/src/jit/arm/Simulator-arm.cpp
@@ -0,0 +1,5472 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm/Simulator-arm.h"
+
+#include "mozilla/Casting.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/FloatingPoint.h"
+#include "mozilla/Likely.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/arm/Assembler-arm.h"
+#include "jit/arm/disasm/Constants-arm.h"
+#include "jit/AtomicOperations.h"
+#include "js/UniquePtr.h"
+#include "js/Utility.h"
+#include "threading/LockGuard.h"
+#include "vm/JSContext.h"
+#include "vm/Runtime.h"
+#include "vm/SharedMem.h"
+#include "wasm/WasmInstance.h"
+#include "wasm/WasmSignalHandlers.h"
+
+extern "C" {
+
+MOZ_EXPORT int64_t __aeabi_idivmod(int x, int y) {
+  // Run-time ABI for the ARM architecture specifies that for |INT_MIN / -1|
+  // "an implementation is (sic) may return any convenient value, possibly the
+  // original numerator."
+  //
+  // |INT_MIN / -1| traps on x86, which isn't listed as an allowed behavior in
+  // the ARM docs, so instead follow LLVM and return the numerator. (And zero
+  // for the remainder.)
+
+  if (x == INT32_MIN && y == -1) {
+    return uint32_t(x);
+  }
+
+  uint32_t lo = uint32_t(x / y);
+  uint32_t hi = uint32_t(x % y);
+  return (int64_t(hi) << 32) | lo;
+}
+
+MOZ_EXPORT int64_t __aeabi_uidivmod(int x, int y) {
+  uint32_t lo = uint32_t(x) / uint32_t(y);
+  uint32_t hi = uint32_t(x) % uint32_t(y);
+  return (int64_t(hi) << 32) | lo;
+}
+}
+
+namespace js {
+namespace jit {
+
+// For decoding load-exclusive and store-exclusive instructions.
+namespace excl {
+
+// Bit positions.
+enum {
+  ExclusiveOpHi = 24,    // Hi bit of opcode field
+  ExclusiveOpLo = 23,    // Lo bit of opcode field
+  ExclusiveSizeHi = 22,  // Hi bit of operand size field
+  ExclusiveSizeLo = 21,  // Lo bit of operand size field
+  ExclusiveLoad = 20     // Bit indicating load
+};
+
+// Opcode bits for exclusive instructions.
+enum { ExclusiveOpcode = 3 };
+
+// Operand size, Bits(ExclusiveSizeHi,ExclusiveSizeLo).
+enum {
+  ExclusiveWord = 0,
+  ExclusiveDouble = 1,
+  ExclusiveByte = 2,
+  ExclusiveHalf = 3
+};
+
+}  // namespace excl
+
+// Load/store multiple addressing mode.
+enum BlockAddrMode {
+  // Alias modes for comparison when writeback does not matter.
+  da_x = (0 | 0 | 0) << 21,  // Decrement after.
+  ia_x = (0 | 4 | 0) << 21,  // Increment after.
+  db_x = (8 | 0 | 0) << 21,  // Decrement before.
+  ib_x = (8 | 4 | 0) << 21,  // Increment before.
+};
+
+// Type of VFP register. Determines register encoding.
+enum VFPRegPrecision { kSinglePrecision = 0, kDoublePrecision = 1 };
+
+enum NeonListType { nlt_1 = 0x7, nlt_2 = 0xA, nlt_3 = 0x6, nlt_4 = 0x2 };
+
+// Supervisor Call (svc) specific support.
+
+// Special Software Interrupt codes when used in the presence of the ARM
+// simulator.
+// svc (formerly swi) provides a 24bit immediate value. Use bits 22:0 for
+// standard SoftwareInterrupCode. Bit 23 is reserved for the stop feature.
+enum SoftwareInterruptCodes {
+  kCallRtRedirected = 0x10,  // Transition to C code.
+  kBreakpoint = 0x20,        // Breakpoint.
+  kStopCode = 1 << 23        // Stop.
+};
+
+const uint32_t kStopCodeMask = kStopCode - 1;
+const uint32_t kMaxStopCode = kStopCode - 1;
+
+// -----------------------------------------------------------------------------
+// Instruction abstraction.
+
+// The class Instruction enables access to individual fields defined in the ARM
+// architecture instruction set encoding as described in figure A3-1.
+// Note that the Assembler uses typedef int32_t Instr.
+//
+// Example: Test whether the instruction at ptr does set the condition code
+// bits.
+//
+// bool InstructionSetsConditionCodes(byte* ptr) {
+//   Instruction* instr = Instruction::At(ptr);
+//   int type = instr->TypeValue();
+//   return ((type == 0) || (type == 1)) && instr->hasS();
+// }
+//
+class SimInstruction {
+ public:
+  enum { kInstrSize = 4, kPCReadOffset = 8 };
+
+  // Get the raw instruction bits.
+  inline Instr instructionBits() const {
+    return *reinterpret_cast<const Instr*>(this);
+  }
+
+  // Set the raw instruction bits to value.
+  inline void setInstructionBits(Instr value) {
+    *reinterpret_cast<Instr*>(this) = value;
+  }
+
+  // Read one particular bit out of the instruction bits.
+  inline int bit(int nr) const { return (instructionBits() >> nr) & 1; }
+
+  // Read a bit field's value out of the instruction bits.
+  inline int bits(int hi, int lo) const {
+    return (instructionBits() >> lo) & ((2 << (hi - lo)) - 1);
+  }
+
+  // Read a bit field out of the instruction bits.
+  inline int bitField(int hi, int lo) const {
+    return instructionBits() & (((2 << (hi - lo)) - 1) << lo);
+  }
+
+  // Accessors for the different named fields used in the ARM encoding.
+  // The naming of these accessor corresponds to figure A3-1.
+  //
+  // Two kind of accessors are declared:
+  // - <Name>Field() will return the raw field, i.e. the field's bits at their
+  //   original place in the instruction encoding.
+  //   e.g. if instr is the 'addgt r0, r1, r2' instruction, encoded as
+  //   0xC0810002 conditionField(instr) will return 0xC0000000.
+  // - <Name>Value() will return the field value, shifted back to bit 0.
+  //   e.g. if instr is the 'addgt r0, r1, r2' instruction, encoded as
+  //   0xC0810002 conditionField(instr) will return 0xC.
+
+  // Generally applicable fields
+  inline Assembler::ARMCondition conditionField() const {
+    return static_cast<Assembler::ARMCondition>(bitField(31, 28));
+  }
+  inline int typeValue() const { return bits(27, 25); }
+  inline int specialValue() const { return bits(27, 23); }
+
+  inline int rnValue() const { return bits(19, 16); }
+  inline int rdValue() const { return bits(15, 12); }
+
+  inline int coprocessorValue() const { return bits(11, 8); }
+
+  // Support for VFP.
+  // Vn(19-16) | Vd(15-12) |  Vm(3-0)
+  inline int vnValue() const { return bits(19, 16); }
+  inline int vmValue() const { return bits(3, 0); }
+  inline int vdValue() const { return bits(15, 12); }
+  inline int nValue() const { return bit(7); }
+  inline int mValue() const { return bit(5); }
+  inline int dValue() const { return bit(22); }
+  inline int rtValue() const { return bits(15, 12); }
+  inline int pValue() const { return bit(24); }
+  inline int uValue() const { return bit(23); }
+  inline int opc1Value() const { return (bit(23) << 2) | bits(21, 20); }
+  inline int opc2Value() const { return bits(19, 16); }
+  inline int opc3Value() const { return bits(7, 6); }
+  inline int szValue() const { return bit(8); }
+  inline int VLValue() const { return bit(20); }
+  inline int VCValue() const { return bit(8); }
+  inline int VAValue() const { return bits(23, 21); }
+  inline int VBValue() const { return bits(6, 5); }
+  inline int VFPNRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 16, 7);
+  }
+  inline int VFPMRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 0, 5);
+  }
+  inline int VFPDRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 12, 22);
+  }
+
+  // Fields used in Data processing instructions.
+  inline int opcodeValue() const { return static_cast<ALUOp>(bits(24, 21)); }
+  inline ALUOp opcodeField() const {
+    return static_cast<ALUOp>(bitField(24, 21));
+  }
+  inline int sValue() const { return bit(20); }
+
+  // With register.
+  inline int rmValue() const { return bits(3, 0); }
+  inline ShiftType shifttypeValue() const {
+    return static_cast<ShiftType>(bits(6, 5));
+  }
+  inline int rsValue() const { return bits(11, 8); }
+  inline int shiftAmountValue() const { return bits(11, 7); }
+
+  // With immediate.
+  inline int rotateValue() const { return bits(11, 8); }
+  inline int immed8Value() const { return bits(7, 0); }
+  inline int immed4Value() const { return bits(19, 16); }
+  inline int immedMovwMovtValue() const {
+    return immed4Value() << 12 | offset12Value();
+  }
+
+  // Fields used in Load/Store instructions.
+  inline int PUValue() const { return bits(24, 23); }
+  inline int PUField() const { return bitField(24, 23); }
+  inline int bValue() const { return bit(22); }
+  inline int wValue() const { return bit(21); }
+  inline int lValue() const { return bit(20); }
+
+  // With register uses same fields as Data processing instructions above with
+  // immediate.
+  inline int offset12Value() const { return bits(11, 0); }
+
+  // Multiple.
+  inline int rlistValue() const { return bits(15, 0); }
+
+  // Extra loads and stores.
+  inline int signValue() const { return bit(6); }
+  inline int hValue() const { return bit(5); }
+  inline int immedHValue() const { return bits(11, 8); }
+  inline int immedLValue() const { return bits(3, 0); }
+
+  // Fields used in Branch instructions.
+  inline int linkValue() const { return bit(24); }
+  inline int sImmed24Value() const { return ((instructionBits() << 8) >> 8); }
+
+  // Fields used in Software interrupt instructions.
+  inline SoftwareInterruptCodes svcValue() const {
+    return static_cast<SoftwareInterruptCodes>(bits(23, 0));
+  }
+
+  // Test for special encodings of type 0 instructions (extra loads and
+  // stores, as well as multiplications).
+  inline bool isSpecialType0() const { return (bit(7) == 1) && (bit(4) == 1); }
+
+  // Test for miscellaneous instructions encodings of type 0 instructions.
+  inline bool isMiscType0() const {
+    return bit(24) == 1 && bit(23) == 0 && bit(20) == 0 && (bit(7) == 0);
+  }
+
+  // Test for a nop instruction, which falls under type 1.
+  inline bool isNopType1() const { return bits(24, 0) == 0x0120F000; }
+
+  // Test for a nop instruction, which falls under type 1.
+  inline bool isCsdbType1() const { return bits(24, 0) == 0x0120F014; }
+
+  // Test for a stop instruction.
+  inline bool isStop() const {
+    return typeValue() == 7 && bit(24) == 1 && svcValue() >= kStopCode;
+  }
+
+  // Test for a udf instruction, which falls under type 3.
+  inline bool isUDF() const {
+    return (instructionBits() & 0xfff000f0) == 0xe7f000f0;
+  }
+
+  // Special accessors that test for existence of a value.
+  inline bool hasS() const { return sValue() == 1; }
+  inline bool hasB() const { return bValue() == 1; }
+  inline bool hasW() const { return wValue() == 1; }
+  inline bool hasL() const { return lValue() == 1; }
+  inline bool hasU() const { return uValue() == 1; }
+  inline bool hasSign() const { return signValue() == 1; }
+  inline bool hasH() const { return hValue() == 1; }
+  inline bool hasLink() const { return linkValue() == 1; }
+
+  // Decoding the double immediate in the vmov instruction.
+  double doubleImmedVmov() const;
+  // Decoding the float32 immediate in the vmov.f32 instruction.
+  float float32ImmedVmov() const;
+
+ private:
+  // Join split register codes, depending on single or double precision.
+  // four_bit is the position of the least-significant bit of the four
+  // bit specifier. one_bit is the position of the additional single bit
+  // specifier.
+  inline int VFPGlueRegValue(VFPRegPrecision pre, int four_bit, int one_bit) {
+    if (pre == kSinglePrecision) {
+      return (bits(four_bit + 3, four_bit) << 1) | bit(one_bit);
+    }
+    return (bit(one_bit) << 4) | bits(four_bit + 3, four_bit);
+  }
+
+  SimInstruction() = delete;
+  SimInstruction(const SimInstruction& other) = delete;
+  void operator=(const SimInstruction& other) = delete;
+};
+
+double SimInstruction::doubleImmedVmov() const {
+  // Reconstruct a double from the immediate encoded in the vmov instruction.
+  //
+  //   instruction: [xxxxxxxx,xxxxabcd,xxxxxxxx,xxxxefgh]
+  //   double: [aBbbbbbb,bbcdefgh,00000000,00000000,
+  //            00000000,00000000,00000000,00000000]
+  //
+  // where B = ~b. Only the high 16 bits are affected.
+  uint64_t high16;
+  high16 = (bits(17, 16) << 4) | bits(3, 0);  // xxxxxxxx,xxcdefgh.
+  high16 |= (0xff * bit(18)) << 6;            // xxbbbbbb,bbxxxxxx.
+  high16 |= (bit(18) ^ 1) << 14;              // xBxxxxxx,xxxxxxxx.
+  high16 |= bit(19) << 15;                    // axxxxxxx,xxxxxxxx.
+
+  uint64_t imm = high16 << 48;
+  return mozilla::BitwiseCast<double>(imm);
+}
+
+float SimInstruction::float32ImmedVmov() const {
+  // Reconstruct a float32 from the immediate encoded in the vmov instruction.
+  //
+  //   instruction: [xxxxxxxx,xxxxabcd,xxxxxxxx,xxxxefgh]
+  //   float32: [aBbbbbbc, defgh000, 00000000, 00000000]
+  //
+  // where B = ~b. Only the high 16 bits are affected.
+  uint32_t imm;
+  imm = (bits(17, 16) << 23) | (bits(3, 0) << 19);  // xxxxxxxc,defgh000.0.0
+  imm |= (0x1f * bit(18)) << 25;                    // xxbbbbbx,xxxxxxxx.0.0
+  imm |= (bit(18) ^ 1) << 30;                       // xBxxxxxx,xxxxxxxx.0.0
+  imm |= bit(19) << 31;                             // axxxxxxx,xxxxxxxx.0.0
+
+  return mozilla::BitwiseCast<float>(imm);
+}
+
+class CachePage {
+ public:
+  static const int LINE_VALID = 0;
+  static const int LINE_INVALID = 1;
+  static const int kPageShift = 12;
+  static const int kPageSize = 1 << kPageShift;
+  static const int kPageMask = kPageSize - 1;
+  static const int kLineShift = 2;  // The cache line is only 4 bytes right now.
+  static const int kLineLength = 1 << kLineShift;
+  static const int kLineMask = kLineLength - 1;
+
+  CachePage() { memset(&validity_map_, LINE_INVALID, sizeof(validity_map_)); }
+  char* validityByte(int offset) {
+    return &validity_map_[offset >> kLineShift];
+  }
+  char* cachedData(int offset) { return &data_[offset]; }
+
+ private:
+  char data_[kPageSize];  // The cached data.
+  static const int kValidityMapSize = kPageSize >> kLineShift;
+  char validity_map_[kValidityMapSize];  // One byte per line.
+};
+
+// Protects the icache() and redirection() properties of the
+// Simulator.
+class AutoLockSimulatorCache : public LockGuard<Mutex> {
+  using Base = LockGuard<Mutex>;
+
+ public:
+  explicit AutoLockSimulatorCache()
+      : Base(SimulatorProcess::singleton_->cacheLock_) {}
+};
+
+mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
+    SimulatorProcess::ICacheCheckingDisableCount(
+        1);  // Checking is disabled by default.
+SimulatorProcess* SimulatorProcess::singleton_ = nullptr;
+
+int64_t Simulator::StopSimAt = -1L;
+
+Simulator* Simulator::Create() {
+  auto sim = MakeUnique<Simulator>();
+  if (!sim) {
+    return nullptr;
+  }
+
+  if (!sim->init()) {
+    return nullptr;
+  }
+
+  char* stopAtStr = getenv("ARM_SIM_STOP_AT");
+  int64_t stopAt;
+  if (stopAtStr && sscanf(stopAtStr, "%lld", &stopAt) == 1) {
+    fprintf(stderr, "\nStopping simulation at icount %lld\n", stopAt);
+    Simulator::StopSimAt = stopAt;
+  }
+
+  return sim.release();
+}
+
+void Simulator::Destroy(Simulator* sim) { js_delete(sim); }
+
+void Simulator::disassemble(SimInstruction* instr, size_t n) {
+#ifdef JS_DISASM_ARM
+  disasm::NameConverter converter;
+  disasm::Disassembler dasm(converter);
+  disasm::EmbeddedVector<char, disasm::ReasonableBufferSize> buffer;
+  while (n-- > 0) {
+    dasm.InstructionDecode(buffer, reinterpret_cast<uint8_t*>(instr));
+    fprintf(stderr, "  0x%08x  %s\n", uint32_t(instr), buffer.start());
+    instr = reinterpret_cast<SimInstruction*>(
+        reinterpret_cast<uint8_t*>(instr) + 4);
+  }
+#endif
+}
+
+void Simulator::disasm(SimInstruction* instr) { disassemble(instr, 1); }
+
+void Simulator::disasm(SimInstruction* instr, size_t n) {
+  disassemble(instr, n);
+}
+
+void Simulator::disasm(SimInstruction* instr, size_t m, size_t n) {
+  disassemble(reinterpret_cast<SimInstruction*>(
+                  reinterpret_cast<uint8_t*>(instr) - m * 4),
+              n);
+}
+
+// The ArmDebugger class is used by the simulator while debugging simulated ARM
+// code.
+class ArmDebugger {
+ public:
+  explicit ArmDebugger(Simulator* sim) : sim_(sim) {}
+
+  void stop(SimInstruction* instr);
+  void debug();
+
+ private:
+  static const Instr kBreakpointInstr =
+      (Assembler::AL | (7 * (1 << 25)) | (1 * (1 << 24)) | kBreakpoint);
+  static const Instr kNopInstr = (Assembler::AL | (13 * (1 << 21)));
+
+  Simulator* sim_;
+
+  int32_t getRegisterValue(int regnum);
+  double getRegisterPairDoubleValue(int regnum);
+  void getVFPDoubleRegisterValue(int regnum, double* value);
+  bool getValue(const char* desc, int32_t* value);
+  bool getVFPDoubleValue(const char* desc, double* value);
+
+  // Set or delete a breakpoint. Returns true if successful.
+  bool setBreakpoint(SimInstruction* breakpc);
+  bool deleteBreakpoint(SimInstruction* breakpc);
+
+  // Undo and redo all breakpoints. This is needed to bracket disassembly and
+  // execution to skip past breakpoints when run from the debugger.
+  void undoBreakpoints();
+  void redoBreakpoints();
+};
+
+void ArmDebugger::stop(SimInstruction* instr) {
+  // Get the stop code.
+  uint32_t code = instr->svcValue() & kStopCodeMask;
+  // Retrieve the encoded address, which comes just after this stop.
+  char* msg =
+      *reinterpret_cast<char**>(sim_->get_pc() + SimInstruction::kInstrSize);
+  // Update this stop description.
+  if (sim_->isWatchedStop(code) && !sim_->watched_stops_[code].desc) {
+    sim_->watched_stops_[code].desc = msg;
+  }
+  // Print the stop message and code if it is not the default code.
+  if (code != kMaxStopCode) {
+    printf("Simulator hit stop %u: %s\n", code, msg);
+  } else {
+    printf("Simulator hit %s\n", msg);
+  }
+  sim_->set_pc(sim_->get_pc() + 2 * SimInstruction::kInstrSize);
+  debug();
+}
+
+int32_t ArmDebugger::getRegisterValue(int regnum) {
+  if (regnum == Registers::pc) {
+    return sim_->get_pc();
+  }
+  return sim_->get_register(regnum);
+}
+
+double ArmDebugger::getRegisterPairDoubleValue(int regnum) {
+  return sim_->get_double_from_register_pair(regnum);
+}
+
+void ArmDebugger::getVFPDoubleRegisterValue(int regnum, double* out) {
+  sim_->get_double_from_d_register(regnum, out);
+}
+
+bool ArmDebugger::getValue(const char* desc, int32_t* value) {
+  Register reg = Register::FromName(desc);
+  if (reg != InvalidReg) {
+    *value = getRegisterValue(reg.code());
+    return true;
+  }
+  if (strncmp(desc, "0x", 2) == 0) {
+    return sscanf(desc + 2, "%x", reinterpret_cast<uint32_t*>(value)) == 1;
+  }
+  return sscanf(desc, "%u", reinterpret_cast<uint32_t*>(value)) == 1;
+}
+
+bool ArmDebugger::getVFPDoubleValue(const char* desc, double* value) {
+  FloatRegister reg = FloatRegister::FromCode(FloatRegister::FromName(desc));
+  if (reg.isInvalid()) {
+    return false;
+  }
+
+  if (reg.isSingle()) {
+    float fval;
+    sim_->get_float_from_s_register(reg.id(), &fval);
+    *value = fval;
+    return true;
+  }
+
+  sim_->get_double_from_d_register(reg.id(), value);
+  return true;
+}
+
+bool ArmDebugger::setBreakpoint(SimInstruction* breakpc) {
+  // Check if a breakpoint can be set. If not return without any side-effects.
+  if (sim_->break_pc_) {
+    return false;
+  }
+
+  // Set the breakpoint.
+  sim_->break_pc_ = breakpc;
+  sim_->break_instr_ = breakpc->instructionBits();
+  // Not setting the breakpoint instruction in the code itself. It will be set
+  // when the debugger shell continues.
+  return true;
+}
+
+bool ArmDebugger::deleteBreakpoint(SimInstruction* breakpc) {
+  if (sim_->break_pc_ != nullptr) {
+    sim_->break_pc_->setInstructionBits(sim_->break_instr_);
+  }
+
+  sim_->break_pc_ = nullptr;
+  sim_->break_instr_ = 0;
+  return true;
+}
+
+void ArmDebugger::undoBreakpoints() {
+  if (sim_->break_pc_) {
+    sim_->break_pc_->setInstructionBits(sim_->break_instr_);
+  }
+}
+
+void ArmDebugger::redoBreakpoints() {
+  if (sim_->break_pc_) {
+    sim_->break_pc_->setInstructionBits(kBreakpointInstr);
+  }
+}
+
+static char* ReadLine(const char* prompt) {
+  UniqueChars result;
+  char line_buf[256];
+  int offset = 0;
+  bool keep_going = true;
+  fprintf(stdout, "%s", prompt);
+  fflush(stdout);
+  while (keep_going) {
+    if (fgets(line_buf, sizeof(line_buf), stdin) == nullptr) {
+      // fgets got an error. Just give up.
+      return nullptr;
+    }
+    int len = strlen(line_buf);
+    if (len > 0 && line_buf[len - 1] == '\n') {
+      // Since we read a new line we are done reading the line. This will
+      // exit the loop after copying this buffer into the result.
+      keep_going = false;
+    }
+    if (!result) {
+      // Allocate the initial result and make room for the terminating
+      // '\0'.
+      result.reset(js_pod_malloc<char>(len + 1));
+      if (!result) {
+        return nullptr;
+      }
+    } else {
+      // Allocate a new result with enough room for the new addition.
+      int new_len = offset + len + 1;
+      char* new_result = js_pod_malloc<char>(new_len);
+      if (!new_result) {
+        return nullptr;
+      }
+      // Copy the existing input into the new array and set the new
+      // array as the result.
+      memcpy(new_result, result.get(), offset * sizeof(char));
+      result.reset(new_result);
+    }
+    // Copy the newly read line into the result.
+    memcpy(result.get() + offset, line_buf, len * sizeof(char));
+    offset += len;
+  }
+
+  MOZ_ASSERT(result);
+  result[offset] = '\0';
+  return result.release();
+}
+
+void ArmDebugger::debug() {
+  intptr_t last_pc = -1;
+  bool done = false;
+
+#define COMMAND_SIZE 63
+#define ARG_SIZE 255
+
+#define STR(a) #a
+#define XSTR(a) STR(a)
+
+  char cmd[COMMAND_SIZE + 1];
+  char arg1[ARG_SIZE + 1];
+  char arg2[ARG_SIZE + 1];
+  char* argv[3] = {cmd, arg1, arg2};
+
+  // Make sure to have a proper terminating character if reaching the limit.
+  cmd[COMMAND_SIZE] = 0;
+  arg1[ARG_SIZE] = 0;
+  arg2[ARG_SIZE] = 0;
+
+  // Undo all set breakpoints while running in the debugger shell. This will
+  // make them invisible to all commands.
+  undoBreakpoints();
+
+#ifndef JS_DISASM_ARM
+  static bool disasm_warning_printed = false;
+  if (!disasm_warning_printed) {
+    printf(
+        "  No ARM disassembler present.  Enable JS_DISASM_ARM in "
+        "configure.in.");
+    disasm_warning_printed = true;
+  }
+#endif
+
+  while (!done && !sim_->has_bad_pc()) {
+    if (last_pc != sim_->get_pc()) {
+#ifdef JS_DISASM_ARM
+      disasm::NameConverter converter;
+      disasm::Disassembler dasm(converter);
+      disasm::EmbeddedVector<char, disasm::ReasonableBufferSize> buffer;
+      dasm.InstructionDecode(buffer,
+                             reinterpret_cast<uint8_t*>(sim_->get_pc()));
+      printf("  0x%08x  %s\n", sim_->get_pc(), buffer.start());
+#endif
+      last_pc = sim_->get_pc();
+    }
+    char* line = ReadLine("sim> ");
+    if (line == nullptr) {
+      break;
+    } else {
+      char* last_input = sim_->lastDebuggerInput();
+      if (strcmp(line, "\n") == 0 && last_input != nullptr) {
+        line = last_input;
+      } else {
+        // Ownership is transferred to sim_;
+        sim_->setLastDebuggerInput(line);
+      }
+
+      // Use sscanf to parse the individual parts of the command line. At the
+      // moment no command expects more than two parameters.
+      int argc = sscanf(line,
+                              "%" XSTR(COMMAND_SIZE) "s "
+                              "%" XSTR(ARG_SIZE) "s "
+                              "%" XSTR(ARG_SIZE) "s",
+                              cmd, arg1, arg2);
+      if (argc < 0) {
+        continue;
+      } else if ((strcmp(cmd, "si") == 0) || (strcmp(cmd, "stepi") == 0)) {
+        sim_->instructionDecode(
+            reinterpret_cast<SimInstruction*>(sim_->get_pc()));
+        sim_->icount_++;
+      } else if ((strcmp(cmd, "skip") == 0)) {
+        sim_->set_pc(sim_->get_pc() + 4);
+        sim_->icount_++;
+      } else if ((strcmp(cmd, "c") == 0) || (strcmp(cmd, "cont") == 0)) {
+        // Execute the one instruction we broke at with breakpoints
+        // disabled.
+        sim_->instructionDecode(
+            reinterpret_cast<SimInstruction*>(sim_->get_pc()));
+        sim_->icount_++;
+        // Leave the debugger shell.
+        done = true;
+      } else if ((strcmp(cmd, "p") == 0) || (strcmp(cmd, "print") == 0)) {
+        if (argc == 2 || (argc == 3 && strcmp(arg2, "fp") == 0)) {
+          int32_t value;
+          double dvalue;
+          if (strcmp(arg1, "all") == 0) {
+            for (uint32_t i = 0; i < Registers::Total; i++) {
+              value = getRegisterValue(i);
+              printf("%3s: 0x%08x %10d", Registers::GetName(i), value, value);
+              if ((argc == 3 && strcmp(arg2, "fp") == 0) && i < 8 &&
+                  (i % 2) == 0) {
+                dvalue = getRegisterPairDoubleValue(i);
+                printf(" (%.16g)\n", dvalue);
+              } else {
+                printf("\n");
+              }
+            }
+            for (uint32_t i = 0; i < FloatRegisters::TotalPhys; i++) {
+              getVFPDoubleRegisterValue(i, &dvalue);
+              uint64_t as_words = mozilla::BitwiseCast<uint64_t>(dvalue);
+              printf("%3s: %.16g 0x%08x %08x\n",
+                     FloatRegister::FromCode(i).name(), dvalue,
+                     static_cast<uint32_t>(as_words >> 32),
+                     static_cast<uint32_t>(as_words & 0xffffffff));
+            }
+          } else {
+            if (getValue(arg1, &value)) {
+              printf("%s: 0x%08x %d \n", arg1, value, value);
+            } else if (getVFPDoubleValue(arg1, &dvalue)) {
+              uint64_t as_words = mozilla::BitwiseCast<uint64_t>(dvalue);
+              printf("%s: %.16g 0x%08x %08x\n", arg1, dvalue,
+                     static_cast<uint32_t>(as_words >> 32),
+                     static_cast<uint32_t>(as_words & 0xffffffff));
+            } else {
+              printf("%s unrecognized\n", arg1);
+            }
+          }
+        } else {
+          printf("print <register>\n");
+        }
+      } else if (strcmp(cmd, "stack") == 0 || strcmp(cmd, "mem") == 0) {
+        int32_t* cur = nullptr;
+        int32_t* end = nullptr;
+        int next_arg = 1;
+
+        if (strcmp(cmd, "stack") == 0) {
+          cur = reinterpret_cast<int32_t*>(sim_->get_register(Simulator::sp));
+        } else {  // "mem"
+          int32_t value;
+          if (!getValue(arg1, &value)) {
+            printf("%s unrecognized\n", arg1);
+            continue;
+          }
+          cur = reinterpret_cast<int32_t*>(value);
+          next_arg++;
+        }
+
+        int32_t words;
+        if (argc == next_arg) {
+          words = 10;
+        } else {
+          if (!getValue(argv[next_arg], &words)) {
+            words = 10;
+          }
+        }
+        end = cur + words;
+
+        while (cur < end) {
+          printf("  %p:  0x%08x %10d", cur, *cur, *cur);
+          printf("\n");
+          cur++;
+        }
+      } else if (strcmp(cmd, "disasm") == 0 || strcmp(cmd, "di") == 0) {
+#ifdef JS_DISASM_ARM
+        uint8_t* prev = nullptr;
+        uint8_t* cur = nullptr;
+        uint8_t* end = nullptr;
+
+        if (argc == 1) {
+          cur = reinterpret_cast<uint8_t*>(sim_->get_pc());
+          end = cur + (10 * SimInstruction::kInstrSize);
+        } else if (argc == 2) {
+          Register reg = Register::FromName(arg1);
+          if (reg != InvalidReg || strncmp(arg1, "0x", 2) == 0) {
+            // The argument is an address or a register name.
+            int32_t value;
+            if (getValue(arg1, &value)) {
+              cur = reinterpret_cast<uint8_t*>(value);
+              // Disassemble 10 instructions at <arg1>.
+              end = cur + (10 * SimInstruction::kInstrSize);
+            }
+          } else {
+            // The argument is the number of instructions.
+            int32_t value;
+            if (getValue(arg1, &value)) {
+              cur = reinterpret_cast<uint8_t*>(sim_->get_pc());
+              // Disassemble <arg1> instructions.
+              end = cur + (value * SimInstruction::kInstrSize);
+            }
+          }
+        } else {
+          int32_t value1;
+          int32_t value2;
+          if (getValue(arg1, &value1) && getValue(arg2, &value2)) {
+            cur = reinterpret_cast<uint8_t*>(value1);
+            end = cur + (value2 * SimInstruction::kInstrSize);
+          }
+        }
+        while (cur < end) {
+          disasm::NameConverter converter;
+          disasm::Disassembler dasm(converter);
+          disasm::EmbeddedVector<char, disasm::ReasonableBufferSize> buffer;
+
+          prev = cur;
+          cur += dasm.InstructionDecode(buffer, cur);
+          printf("  0x%08x  %s\n", reinterpret_cast<uint32_t>(prev),
+                 buffer.start());
+        }
+#endif
+      } else if (strcmp(cmd, "gdb") == 0) {
+        printf("relinquishing control to gdb\n");
+#ifdef _MSC_VER
+        __debugbreak();
+#else
+        asm("int $3");
+#endif
+        printf("regaining control from gdb\n");
+      } else if (strcmp(cmd, "break") == 0) {
+        if (argc == 2) {
+          int32_t value;
+          if (getValue(arg1, &value)) {
+            if (!setBreakpoint(reinterpret_cast<SimInstruction*>(value))) {
+              printf("setting breakpoint failed\n");
+            }
+          } else {
+            printf("%s unrecognized\n", arg1);
+          }
+        } else {
+          printf("break <address>\n");
+        }
+      } else if (strcmp(cmd, "del") == 0) {
+        if (!deleteBreakpoint(nullptr)) {
+          printf("deleting breakpoint failed\n");
+        }
+      } else if (strcmp(cmd, "flags") == 0) {
+        printf("N flag: %d; ", sim_->n_flag_);
+        printf("Z flag: %d; ", sim_->z_flag_);
+        printf("C flag: %d; ", sim_->c_flag_);
+        printf("V flag: %d\n", sim_->v_flag_);
+        printf("INVALID OP flag: %d; ", sim_->inv_op_vfp_flag_);
+        printf("DIV BY ZERO flag: %d; ", sim_->div_zero_vfp_flag_);
+        printf("OVERFLOW flag: %d; ", sim_->overflow_vfp_flag_);
+        printf("UNDERFLOW flag: %d; ", sim_->underflow_vfp_flag_);
+        printf("INEXACT flag: %d;\n", sim_->inexact_vfp_flag_);
+      } else if (strcmp(cmd, "stop") == 0) {
+        int32_t value;
+        intptr_t stop_pc = sim_->get_pc() - 2 * SimInstruction::kInstrSize;
+        SimInstruction* stop_instr = reinterpret_cast<SimInstruction*>(stop_pc);
+        SimInstruction* msg_address = reinterpret_cast<SimInstruction*>(
+            stop_pc + SimInstruction::kInstrSize);
+        if ((argc == 2) && (strcmp(arg1, "unstop") == 0)) {
+          // Remove the current stop.
+          if (sim_->isStopInstruction(stop_instr)) {
+            stop_instr->setInstructionBits(kNopInstr);
+            msg_address->setInstructionBits(kNopInstr);
+          } else {
+            printf("Not at debugger stop.\n");
+          }
+        } else if (argc == 3) {
+          // Print information about all/the specified breakpoint(s).
+          if (strcmp(arg1, "info") == 0) {
+            if (strcmp(arg2, "all") == 0) {
+              printf("Stop information:\n");
+              for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
+                sim_->printStopInfo(i);
+              }
+            } else if (getValue(arg2, &value)) {
+              sim_->printStopInfo(value);
+            } else {
+              printf("Unrecognized argument.\n");
+            }
+          } else if (strcmp(arg1, "enable") == 0) {
+            // Enable all/the specified breakpoint(s).
+            if (strcmp(arg2, "all") == 0) {
+              for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
+                sim_->enableStop(i);
+              }
+            } else if (getValue(arg2, &value)) {
+              sim_->enableStop(value);
+            } else {
+              printf("Unrecognized argument.\n");
+            }
+          } else if (strcmp(arg1, "disable") == 0) {
+            // Disable all/the specified breakpoint(s).
+            if (strcmp(arg2, "all") == 0) {
+              for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
+                sim_->disableStop(i);
+              }
+            } else if (getValue(arg2, &value)) {
+              sim_->disableStop(value);
+            } else {
+              printf("Unrecognized argument.\n");
+            }
+          }
+        } else {
+          printf("Wrong usage. Use help command for more information.\n");
+        }
+      } else if ((strcmp(cmd, "h") == 0) || (strcmp(cmd, "help") == 0)) {
+        printf("cont\n");
+        printf("  continue execution (alias 'c')\n");
+        printf("skip\n");
+        printf("  skip one instruction (set pc to next instruction)\n");
+        printf("stepi\n");
+        printf("  step one instruction (alias 'si')\n");
+        printf("print <register>\n");
+        printf("  print register content (alias 'p')\n");
+        printf("  use register name 'all' to print all registers\n");
+        printf("  add argument 'fp' to print register pair double values\n");
+        printf("flags\n");
+        printf("  print flags\n");
+        printf("stack [<words>]\n");
+        printf("  dump stack content, default dump 10 words)\n");
+        printf("mem <address> [<words>]\n");
+        printf("  dump memory content, default dump 10 words)\n");
+        printf("disasm [<instructions>]\n");
+        printf("disasm [<address/register>]\n");
+        printf("disasm [[<address/register>] <instructions>]\n");
+        printf("  disassemble code, default is 10 instructions\n");
+        printf("  from pc (alias 'di')\n");
+        printf("gdb\n");
+        printf("  enter gdb\n");
+        printf("break <address>\n");
+        printf("  set a break point on the address\n");
+        printf("del\n");
+        printf("  delete the breakpoint\n");
+        printf("stop feature:\n");
+        printf("  Description:\n");
+        printf("    Stops are debug instructions inserted by\n");
+        printf("    the Assembler::stop() function.\n");
+        printf("    When hitting a stop, the Simulator will\n");
+        printf("    stop and and give control to the ArmDebugger.\n");
+        printf("    The first %d stop codes are watched:\n",
+               Simulator::kNumOfWatchedStops);
+        printf("    - They can be enabled / disabled: the Simulator\n");
+        printf("      will / won't stop when hitting them.\n");
+        printf("    - The Simulator keeps track of how many times they \n");
+        printf("      are met. (See the info command.) Going over a\n");
+        printf("      disabled stop still increases its counter. \n");
+        printf("  Commands:\n");
+        printf("    stop info all/<code> : print infos about number <code>\n");
+        printf("      or all stop(s).\n");
+        printf("    stop enable/disable all/<code> : enables / disables\n");
+        printf("      all or number <code> stop(s)\n");
+        printf("    stop unstop\n");
+        printf("      ignore the stop instruction at the current location\n");
+        printf("      from now on\n");
+      } else {
+        printf("Unknown command: %s\n", cmd);
+      }
+    }
+  }
+
+  // Add all the breakpoints back to stop execution and enter the debugger
+  // shell when hit.
+  redoBreakpoints();
+
+#undef COMMAND_SIZE
+#undef ARG_SIZE
+
+#undef STR
+#undef XSTR
+}
+
+static bool AllOnOnePage(uintptr_t start, int size) {
+  intptr_t start_page = (start & ~CachePage::kPageMask);
+  intptr_t end_page = ((start + size) & ~CachePage::kPageMask);
+  return start_page == end_page;
+}
+
+static CachePage* GetCachePageLocked(SimulatorProcess::ICacheMap& i_cache,
+                                     void* page) {
+  SimulatorProcess::ICacheMap::AddPtr p = i_cache.lookupForAdd(page);
+  if (p) {
+    return p->value();
+  }
+
+  AutoEnterOOMUnsafeRegion oomUnsafe;
+  CachePage* new_page = js_new<CachePage>();
+  if (!new_page || !i_cache.add(p, page, new_page)) {
+    oomUnsafe.crash("Simulator CachePage");
+  }
+
+  return new_page;
+}
+
+// Flush from start up to and not including start + size.
+static void FlushOnePageLocked(SimulatorProcess::ICacheMap& i_cache,
+                               intptr_t start, int size) {
+  MOZ_ASSERT(size <= CachePage::kPageSize);
+  MOZ_ASSERT(AllOnOnePage(start, size - 1));
+  MOZ_ASSERT((start & CachePage::kLineMask) == 0);
+  MOZ_ASSERT((size & CachePage::kLineMask) == 0);
+
+  void* page = reinterpret_cast<void*>(start & (~CachePage::kPageMask));
+  int offset = (start & CachePage::kPageMask);
+  CachePage* cache_page = GetCachePageLocked(i_cache, page);
+  char* valid_bytemap = cache_page->validityByte(offset);
+  memset(valid_bytemap, CachePage::LINE_INVALID, size >> CachePage::kLineShift);
+}
+
+static void FlushICacheLocked(SimulatorProcess::ICacheMap& i_cache,
+                              void* start_addr, size_t size) {
+  intptr_t start = reinterpret_cast<intptr_t>(start_addr);
+  int intra_line = (start & CachePage::kLineMask);
+  start -= intra_line;
+  size += intra_line;
+  size = ((size - 1) | CachePage::kLineMask) + 1;
+  int offset = (start & CachePage::kPageMask);
+  while (!AllOnOnePage(start, size - 1)) {
+    int bytes_to_flush = CachePage::kPageSize - offset;
+    FlushOnePageLocked(i_cache, start, bytes_to_flush);
+    start += bytes_to_flush;
+    size -= bytes_to_flush;
+    MOZ_ASSERT((start & CachePage::kPageMask) == 0);
+    offset = 0;
+  }
+  if (size != 0) {
+    FlushOnePageLocked(i_cache, start, size);
+  }
+}
+
+/* static */
+void SimulatorProcess::checkICacheLocked(SimInstruction* instr) {
+  intptr_t address = reinterpret_cast<intptr_t>(instr);
+  void* page = reinterpret_cast<void*>(address & (~CachePage::kPageMask));
+  void* line = reinterpret_cast<void*>(address & (~CachePage::kLineMask));
+  int offset = (address & CachePage::kPageMask);
+  CachePage* cache_page = GetCachePageLocked(icache(), page);
+  char* cache_valid_byte = cache_page->validityByte(offset);
+  bool cache_hit = (*cache_valid_byte == CachePage::LINE_VALID);
+  char* cached_line = cache_page->cachedData(offset & ~CachePage::kLineMask);
+
+  if (cache_hit) {
+    // Check that the data in memory matches the contents of the I-cache.
+    mozilla::DebugOnly<int> cmpret =
+        memcmp(reinterpret_cast<void*>(instr), cache_page->cachedData(offset),
+               SimInstruction::kInstrSize);
+    MOZ_ASSERT(cmpret == 0);
+  } else {
+    // Cache miss. Load memory into the cache.
+    memcpy(cached_line, line, CachePage::kLineLength);
+    *cache_valid_byte = CachePage::LINE_VALID;
+  }
+}
+
+HashNumber SimulatorProcess::ICacheHasher::hash(const Lookup& l) {
+  return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(l)) >> 2;
+}
+
+bool SimulatorProcess::ICacheHasher::match(const Key& k, const Lookup& l) {
+  MOZ_ASSERT((reinterpret_cast<intptr_t>(k) & CachePage::kPageMask) == 0);
+  MOZ_ASSERT((reinterpret_cast<intptr_t>(l) & CachePage::kPageMask) == 0);
+  return k == l;
+}
+
+void Simulator::setLastDebuggerInput(char* input) {
+  js_free(lastDebuggerInput_);
+  lastDebuggerInput_ = input;
+}
+
+/* static */
+void SimulatorProcess::FlushICache(void* start_addr, size_t size) {
+  JitSpewCont(JitSpew_CacheFlush, "[%p %zx]", start_addr, size);
+  if (!ICacheCheckingDisableCount) {
+    AutoLockSimulatorCache als;
+    js::jit::FlushICacheLocked(icache(), start_addr, size);
+  }
+}
+
+Simulator::Simulator() {
+  // Set up simulator support first. Some of this information is needed to
+  // setup the architecture state.
+
+  // Note, allocation and anything that depends on allocated memory is
+  // deferred until init(), in order to handle OOM properly.
+
+  stack_ = nullptr;
+  stackLimit_ = 0;
+  pc_modified_ = false;
+  icount_ = 0L;
+  break_pc_ = nullptr;
+  break_instr_ = 0;
+  single_stepping_ = false;
+  single_step_callback_ = nullptr;
+  single_step_callback_arg_ = nullptr;
+  skipCalleeSavedRegsCheck = false;
+
+  // Set up architecture state.
+  // All registers are initialized to zero to start with.
+  for (int i = 0; i < num_registers; i++) {
+    registers_[i] = 0;
+  }
+
+  n_flag_ = false;
+  z_flag_ = false;
+  c_flag_ = false;
+  v_flag_ = false;
+
+  for (int i = 0; i < num_d_registers * 2; i++) {
+    vfp_registers_[i] = 0;
+  }
+
+  n_flag_FPSCR_ = false;
+  z_flag_FPSCR_ = false;
+  c_flag_FPSCR_ = false;
+  v_flag_FPSCR_ = false;
+  FPSCR_rounding_mode_ = SimRZ;
+  FPSCR_default_NaN_mode_ = true;
+
+  inv_op_vfp_flag_ = false;
+  div_zero_vfp_flag_ = false;
+  overflow_vfp_flag_ = false;
+  underflow_vfp_flag_ = false;
+  inexact_vfp_flag_ = false;
+
+  // The lr and pc are initialized to a known bad value that will cause an
+  // access violation if the simulator ever tries to execute it.
+  registers_[pc] = bad_lr;
+  registers_[lr] = bad_lr;
+
+  lastDebuggerInput_ = nullptr;
+
+  exclusiveMonitorHeld_ = false;
+  exclusiveMonitor_ = 0;
+}
+
+bool Simulator::init() {
+  // Allocate 2MB for the stack. Note that we will only use 1MB, see below.
+  static const size_t stackSize = 2 * 1024 * 1024;
+  stack_ = js_pod_malloc<char>(stackSize);
+  if (!stack_) {
+    return false;
+  }
+
+  // Leave a safety margin of 1MB to prevent overrunning the stack when
+  // pushing values (total stack size is 2MB).
+  stackLimit_ = reinterpret_cast<uintptr_t>(stack_) + 1024 * 1024;
+
+  // The sp is initialized to point to the bottom (high address) of the
+  // allocated stack area. To be safe in potential stack underflows we leave
+  // some buffer below.
+  registers_[sp] = reinterpret_cast<int32_t>(stack_) + stackSize - 64;
+
+  return true;
+}
+
+// When the generated code calls a VM function (masm.callWithABI) we need to
+// call that function instead of trying to execute it with the simulator
+// (because it's x86 code instead of arm code). We do that by redirecting the VM
+// call to a svc (Supervisor Call) instruction that is handled by the
+// simulator. We write the original destination of the jump just at a known
+// offset from the svc instruction so the simulator knows what to call.
+class Redirection {
+  friend class SimulatorProcess;
+
+  // sim's lock must already be held.
+  Redirection(void* nativeFunction, ABIFunctionType type)
+      : nativeFunction_(nativeFunction),
+        swiInstruction_(Assembler::AL | (0xf * (1 << 24)) | kCallRtRedirected),
+        type_(type),
+        next_(nullptr) {
+    next_ = SimulatorProcess::redirection();
+    if (!SimulatorProcess::ICacheCheckingDisableCount) {
+      FlushICacheLocked(SimulatorProcess::icache(), addressOfSwiInstruction(),
+                        SimInstruction::kInstrSize);
+    }
+    SimulatorProcess::setRedirection(this);
+  }
+
+ public:
+  void* addressOfSwiInstruction() { return &swiInstruction_; }
+  void* nativeFunction() const { return nativeFunction_; }
+  ABIFunctionType type() const { return type_; }
+
+  static Redirection* Get(void* nativeFunction, ABIFunctionType type) {
+    AutoLockSimulatorCache als;
+
+    Redirection* current = SimulatorProcess::redirection();
+    for (; current != nullptr; current = current->next_) {
+      if (current->nativeFunction_ == nativeFunction) {
+        MOZ_ASSERT(current->type() == type);
+        return current;
+      }
+    }
+
+    // Note: we can't use js_new here because the constructor is private.
+    AutoEnterOOMUnsafeRegion oomUnsafe;
+    Redirection* redir = js_pod_malloc<Redirection>(1);
+    if (!redir) {
+      oomUnsafe.crash("Simulator redirection");
+    }
+    new (redir) Redirection(nativeFunction, type);
+    return redir;
+  }
+
+  static Redirection* FromSwiInstruction(SimInstruction* swiInstruction) {
+    uint8_t* addrOfSwi = reinterpret_cast<uint8_t*>(swiInstruction);
+    uint8_t* addrOfRedirection =
+        addrOfSwi - offsetof(Redirection, swiInstruction_);
+    return reinterpret_cast<Redirection*>(addrOfRedirection);
+  }
+
+ private:
+  void* nativeFunction_;
+  uint32_t swiInstruction_;
+  ABIFunctionType type_;
+  Redirection* next_;
+};
+
+Simulator::~Simulator() { js_free(stack_); }
+
+SimulatorProcess::SimulatorProcess()
+    : cacheLock_(mutexid::SimulatorCacheLock), redirection_(nullptr) {
+  if (getenv("ARM_SIM_ICACHE_CHECKS")) {
+    ICacheCheckingDisableCount = 0;
+  }
+}
+
+SimulatorProcess::~SimulatorProcess() {
+  Redirection* r = redirection_;
+  while (r) {
+    Redirection* next = r->next_;
+    js_delete(r);
+    r = next;
+  }
+}
+
+/* static */
+void* Simulator::RedirectNativeFunction(void* nativeFunction,
+                                        ABIFunctionType type) {
+  Redirection* redirection = Redirection::Get(nativeFunction, type);
+  return redirection->addressOfSwiInstruction();
+}
+
+// Sets the register in the architecture state. It will also deal with updating
+// Simulator internal state for special registers such as PC.
+void Simulator::set_register(int reg, int32_t value) {
+  MOZ_ASSERT(reg >= 0 && reg < num_registers);
+  if (reg == pc) {
+    pc_modified_ = true;
+  }
+  registers_[reg] = value;
+}
+
+// Get the register from the architecture state. This function does handle the
+// special case of accessing the PC register.
+int32_t Simulator::get_register(int reg) const {
+  MOZ_ASSERT(reg >= 0 && reg < num_registers);
+  // Work around GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43949
+  if (reg >= num_registers) return 0;
+  return registers_[reg] + ((reg == pc) ? SimInstruction::kPCReadOffset : 0);
+}
+
+double Simulator::get_double_from_register_pair(int reg) {
+  MOZ_ASSERT(reg >= 0 && reg < num_registers && (reg % 2) == 0);
+
+  // Read the bits from the unsigned integer register_[] array into the double
+  // precision floating point value and return it.
+  double dm_val = 0.0;
+  char buffer[2 * sizeof(vfp_registers_[0])];
+  memcpy(buffer, &registers_[reg], 2 * sizeof(registers_[0]));
+  memcpy(&dm_val, buffer, 2 * sizeof(registers_[0]));
+  return dm_val;
+}
+
+void Simulator::set_register_pair_from_double(int reg, double* value) {
+  MOZ_ASSERT(reg >= 0 && reg < num_registers && (reg % 2) == 0);
+  memcpy(registers_ + reg, value, sizeof(*value));
+}
+
+void Simulator::set_dw_register(int dreg, const int* dbl) {
+  MOZ_ASSERT(dreg >= 0 && dreg < num_d_registers);
+  registers_[dreg] = dbl[0];
+  registers_[dreg + 1] = dbl[1];
+}
+
+void Simulator::get_d_register(int dreg, uint64_t* value) {
+  MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
+  memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value));
+}
+
+void Simulator::set_d_register(int dreg, const uint64_t* value) {
+  MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
+  memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value));
+}
+
+void Simulator::get_d_register(int dreg, uint32_t* value) {
+  MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
+  memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
+}
+
+void Simulator::set_d_register(int dreg, const uint32_t* value) {
+  MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
+  memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
+}
+
+void Simulator::get_q_register(int qreg, uint64_t* value) {
+  MOZ_ASSERT(qreg >= 0 && qreg < num_q_registers);
+  memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 2);
+}
+
+void Simulator::set_q_register(int qreg, const uint64_t* value) {
+  MOZ_ASSERT(qreg >= 0 && qreg < num_q_registers);
+  memcpy(vfp_registers_ + qreg * 4, value, sizeof(*value) * 2);
+}
+
+void Simulator::get_q_register(int qreg, uint32_t* value) {
+  MOZ_ASSERT(qreg >= 0 && qreg < num_q_registers);
+  memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 4);
+}
+
+void Simulator::set_q_register(int qreg, const uint32_t* value) {
+  MOZ_ASSERT((qreg >= 0) && (qreg < num_q_registers));
+  memcpy(vfp_registers_ + qreg * 4, value, sizeof(*value) * 4);
+}
+
+void Simulator::set_pc(int32_t value) {
+  pc_modified_ = true;
+  registers_[pc] = value;
+}
+
+bool Simulator::has_bad_pc() const {
+  return registers_[pc] == bad_lr || registers_[pc] == end_sim_pc;
+}
+
+// Raw access to the PC register without the special adjustment when reading.
+int32_t Simulator::get_pc() const { return registers_[pc]; }
+
+void Simulator::set_s_register(int sreg, unsigned int value) {
+  MOZ_ASSERT(sreg >= 0 && sreg < num_s_registers);
+  vfp_registers_[sreg] = value;
+}
+
+unsigned Simulator::get_s_register(int sreg) const {
+  MOZ_ASSERT(sreg >= 0 && sreg < num_s_registers);
+  return vfp_registers_[sreg];
+}
+
+template <class InputType, int register_size>
+void Simulator::setVFPRegister(int reg_index, const InputType& value) {
+  MOZ_ASSERT(reg_index >= 0);
+  MOZ_ASSERT_IF(register_size == 1, reg_index < num_s_registers);
+  MOZ_ASSERT_IF(register_size == 2, reg_index < int(FloatRegisters::TotalPhys));
+
+  char buffer[register_size * sizeof(vfp_registers_[0])];
+  memcpy(buffer, &value, register_size * sizeof(vfp_registers_[0]));
+  memcpy(&vfp_registers_[reg_index * register_size], buffer,
+         register_size * sizeof(vfp_registers_[0]));
+}
+
+template <class ReturnType, int register_size>
+void Simulator::getFromVFPRegister(int reg_index, ReturnType* out) {
+  MOZ_ASSERT(reg_index >= 0);
+  MOZ_ASSERT_IF(register_size == 1, reg_index < num_s_registers);
+  MOZ_ASSERT_IF(register_size == 2, reg_index < int(FloatRegisters::TotalPhys));
+
+  char buffer[register_size * sizeof(vfp_registers_[0])];
+  memcpy(buffer, &vfp_registers_[register_size * reg_index],
+         register_size * sizeof(vfp_registers_[0]));
+  memcpy(out, buffer, register_size * sizeof(vfp_registers_[0]));
+}
+
+// These forced-instantiations are for jsapi-tests. Evidently, nothing
+// requires these to be instantiated.
+template void Simulator::getFromVFPRegister<double, 2>(int reg_index,
+                                                       double* out);
+template void Simulator::getFromVFPRegister<float, 1>(int reg_index,
+                                                      float* out);
+template void Simulator::setVFPRegister<double, 2>(int reg_index,
+                                                   const double& value);
+template void Simulator::setVFPRegister<float, 1>(int reg_index,
+                                                  const float& value);
+
+void Simulator::getFpArgs(double* x, double* y, int32_t* z) {
+  if (UseHardFpABI()) {
+    get_double_from_d_register(0, x);
+    get_double_from_d_register(1, y);
+    *z = get_register(0);
+  } else {
+    *x = get_double_from_register_pair(0);
+    *y = get_double_from_register_pair(2);
+    *z = get_register(2);
+  }
+}
+
+void Simulator::getFpFromStack(int32_t* stack, double* x) {
+  MOZ_ASSERT(stack && x);
+  char buffer[2 * sizeof(stack[0])];
+  memcpy(buffer, stack, 2 * sizeof(stack[0]));
+  memcpy(x, buffer, 2 * sizeof(stack[0]));
+}
+
+void Simulator::setCallResultDouble(double result) {
+  // The return value is either in r0/r1 or d0.
+  if (UseHardFpABI()) {
+    char buffer[2 * sizeof(vfp_registers_[0])];
+    memcpy(buffer, &result, sizeof(buffer));
+    // Copy result to d0.
+    memcpy(vfp_registers_, buffer, sizeof(buffer));
+  } else {
+    char buffer[2 * sizeof(registers_[0])];
+    memcpy(buffer, &result, sizeof(buffer));
+    // Copy result to r0 and r1.
+    memcpy(registers_, buffer, sizeof(buffer));
+  }
+}
+
+void Simulator::setCallResultFloat(float result) {
+  if (UseHardFpABI()) {
+    char buffer[sizeof(registers_[0])];
+    memcpy(buffer, &result, sizeof(buffer));
+    // Copy result to s0.
+    memcpy(vfp_registers_, buffer, sizeof(buffer));
+  } else {
+    char buffer[sizeof(registers_[0])];
+    memcpy(buffer, &result, sizeof(buffer));
+    // Copy result to r0.
+    memcpy(registers_, buffer, sizeof(buffer));
+  }
+}
+
+void Simulator::setCallResult(int64_t res) {
+  set_register(r0, static_cast<int32_t>(res));
+  set_register(r1, static_cast<int32_t>(res >> 32));
+}
+
+void Simulator::exclusiveMonitorSet(uint64_t value) {
+  exclusiveMonitor_ = value;
+  exclusiveMonitorHeld_ = true;
+}
+
+uint64_t Simulator::exclusiveMonitorGetAndClear(bool* held) {
+  *held = exclusiveMonitorHeld_;
+  exclusiveMonitorHeld_ = false;
+  return *held ? exclusiveMonitor_ : 0;
+}
+
+void Simulator::exclusiveMonitorClear() { exclusiveMonitorHeld_ = false; }
+
+JS::ProfilingFrameIterator::RegisterState Simulator::registerState() {
+  wasm::RegisterState state;
+  state.pc = (void*)get_pc();
+  state.fp = (void*)get_register(fp);
+  state.sp = (void*)get_register(sp);
+  state.lr = (void*)get_register(lr);
+  return state;
+}
+
+uint64_t Simulator::readQ(int32_t addr, SimInstruction* instr,
+                          UnalignedPolicy f) {
+  if (handleWasmSegFault(addr, 8)) {
+    return UINT64_MAX;
+  }
+
+  if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
+    uint64_t* ptr = reinterpret_cast<uint64_t*>(addr);
+    return *ptr;
+  }
+
+  // See the comments below in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    uint64_t value;
+    memcpy(&value, ptr, sizeof(value));
+    return value;
+  }
+
+  printf("Unaligned read at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+void Simulator::writeQ(int32_t addr, uint64_t value, SimInstruction* instr,
+                       UnalignedPolicy f) {
+  if (handleWasmSegFault(addr, 8)) {
+    return;
+  }
+
+  if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
+    uint64_t* ptr = reinterpret_cast<uint64_t*>(addr);
+    *ptr = value;
+    return;
+  }
+
+  // See the comments below in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    memcpy(ptr, &value, sizeof(value));
+    return;
+  }
+
+  printf("Unaligned write at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+int Simulator::readW(int32_t addr, SimInstruction* instr, UnalignedPolicy f) {
+  if (handleWasmSegFault(addr, 4)) {
+    return -1;
+  }
+
+  if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
+    intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
+    return *ptr;
+  }
+
+  // In WebAssembly, we want unaligned accesses to either raise a signal or
+  // do the right thing. Making this simulator properly emulate the behavior
+  // of raising a signal is complex, so as a special-case, when in wasm code,
+  // we just do the right thing.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    int value;
+    memcpy(&value, ptr, sizeof(value));
+    return value;
+  }
+
+  printf("Unaligned read at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+void Simulator::writeW(int32_t addr, int value, SimInstruction* instr,
+                       UnalignedPolicy f) {
+  if (handleWasmSegFault(addr, 4)) {
+    return;
+  }
+
+  if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
+    intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
+    *ptr = value;
+    return;
+  }
+
+  // See the comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    memcpy(ptr, &value, sizeof(value));
+    return;
+  }
+
+  printf("Unaligned write at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+// For the time being, define Relaxed operations in terms of SeqCst
+// operations - we don't yet need Relaxed operations anywhere else in
+// the system, and the distinction is not important to the simulation
+// at the level where we're operating.
+
+template <typename T>
+static T loadRelaxed(SharedMem<T*> addr) {
+  return AtomicOperations::loadSeqCst(addr);
+}
+
+template <typename T>
+static T compareExchangeRelaxed(SharedMem<T*> addr, T oldval, T newval) {
+  return AtomicOperations::compareExchangeSeqCst(addr, oldval, newval);
+}
+
+int Simulator::readExW(int32_t addr, SimInstruction* instr) {
+  if (addr & 3) {
+    MOZ_CRASH("Unaligned exclusive read");
+  }
+
+  if (handleWasmSegFault(addr, 4)) {
+    return -1;
+  }
+
+  SharedMem<int32_t*> ptr =
+      SharedMem<int32_t*>::shared(reinterpret_cast<int32_t*>(addr));
+  int32_t value = loadRelaxed(ptr);
+  exclusiveMonitorSet(value);
+  return value;
+}
+
+int32_t Simulator::writeExW(int32_t addr, int value, SimInstruction* instr) {
+  if (addr & 3) {
+    MOZ_CRASH("Unaligned exclusive write");
+  }
+
+  if (handleWasmSegFault(addr, 4)) {
+    return -1;
+  }
+
+  SharedMem<int32_t*> ptr =
+      SharedMem<int32_t*>::shared(reinterpret_cast<int32_t*>(addr));
+  bool held;
+  int32_t expected = int32_t(exclusiveMonitorGetAndClear(&held));
+  if (!held) {
+    return 1;
+  }
+  int32_t old = compareExchangeRelaxed(ptr, expected, int32_t(value));
+  return old != expected;
+}
+
+uint16_t Simulator::readHU(int32_t addr, SimInstruction* instr) {
+  if (handleWasmSegFault(addr, 2)) {
+    return UINT16_MAX;
+  }
+
+  // The regexp engine emits unaligned loads, so we don't check for them here
+  // like most of the other methods do.
+  if ((addr & 1) == 0 || !HasAlignmentFault()) {
+    uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
+    return *ptr;
+  }
+
+  // See comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    uint16_t value;
+    memcpy(&value, ptr, sizeof(value));
+    return value;
+  }
+
+  printf("Unaligned unsigned halfword read at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+  return 0;
+}
+
+int16_t Simulator::readH(int32_t addr, SimInstruction* instr) {
+  if (handleWasmSegFault(addr, 2)) {
+    return -1;
+  }
+
+  if ((addr & 1) == 0 || !HasAlignmentFault()) {
+    int16_t* ptr = reinterpret_cast<int16_t*>(addr);
+    return *ptr;
+  }
+
+  // See comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    int16_t value;
+    memcpy(&value, ptr, sizeof(value));
+    return value;
+  }
+
+  printf("Unaligned signed halfword read at 0x%08x\n", addr);
+  MOZ_CRASH();
+  return 0;
+}
+
+void Simulator::writeH(int32_t addr, uint16_t value, SimInstruction* instr) {
+  if (handleWasmSegFault(addr, 2)) {
+    return;
+  }
+
+  if ((addr & 1) == 0 || !HasAlignmentFault()) {
+    uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
+    *ptr = value;
+    return;
+  }
+
+  // See the comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    memcpy(ptr, &value, sizeof(value));
+    return;
+  }
+
+  printf("Unaligned unsigned halfword write at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+void Simulator::writeH(int32_t addr, int16_t value, SimInstruction* instr) {
+  if (handleWasmSegFault(addr, 2)) {
+    return;
+  }
+
+  if ((addr & 1) == 0 || !HasAlignmentFault()) {
+    int16_t* ptr = reinterpret_cast<int16_t*>(addr);
+    *ptr = value;
+    return;
+  }
+
+  // See the comments above in readW.
+  if (FixupFault() && wasm::InCompiledCode(reinterpret_cast<void*>(get_pc()))) {
+    char* ptr = reinterpret_cast<char*>(addr);
+    memcpy(ptr, &value, sizeof(value));
+    return;
+  }
+
+  printf("Unaligned halfword write at 0x%08x, pc=%p\n", addr, instr);
+  MOZ_CRASH();
+}
+
+uint16_t Simulator::readExHU(int32_t addr, SimInstruction* instr) {
+  if (addr & 1) {
+    MOZ_CRASH("Unaligned exclusive read");
+  }
+
+  if (handleWasmSegFault(addr, 2)) {
+    return UINT16_MAX;
+  }
+
+  SharedMem<uint16_t*> ptr =
+      SharedMem<uint16_t*>::shared(reinterpret_cast<uint16_t*>(addr));
+  uint16_t value = loadRelaxed(ptr);
+  exclusiveMonitorSet(value);
+  return value;
+}
+
+int32_t Simulator::writeExH(int32_t addr, uint16_t value,
+                            SimInstruction* instr) {
+  if (addr & 1) {
+    MOZ_CRASH("Unaligned exclusive write");
+  }
+
+  if (handleWasmSegFault(addr, 2)) {
+    return -1;
+  }
+
+  SharedMem<uint16_t*> ptr =
+      SharedMem<uint16_t*>::shared(reinterpret_cast<uint16_t*>(addr));
+  bool held;
+  uint16_t expected = uint16_t(exclusiveMonitorGetAndClear(&held));
+  if (!held) {
+    return 1;
+  }
+  uint16_t old = compareExchangeRelaxed(ptr, expected, value);
+  return old != expected;
+}
+
+uint8_t Simulator::readBU(int32_t addr) {
+  if (handleWasmSegFault(addr, 1)) {
+    return UINT8_MAX;
+  }
+
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
+  return *ptr;
+}
+
+uint8_t Simulator::readExBU(int32_t addr) {
+  if (handleWasmSegFault(addr, 1)) {
+    return UINT8_MAX;
+  }
+
+  SharedMem<uint8_t*> ptr =
+      SharedMem<uint8_t*>::shared(reinterpret_cast<uint8_t*>(addr));
+  uint8_t value = loadRelaxed(ptr);
+  exclusiveMonitorSet(value);
+  return value;
+}
+
+int32_t Simulator::writeExB(int32_t addr, uint8_t value) {
+  if (handleWasmSegFault(addr, 1)) {
+    return -1;
+  }
+
+  SharedMem<uint8_t*> ptr =
+      SharedMem<uint8_t*>::shared(reinterpret_cast<uint8_t*>(addr));
+  bool held;
+  uint8_t expected = uint8_t(exclusiveMonitorGetAndClear(&held));
+  if (!held) {
+    return 1;
+  }
+  uint8_t old = compareExchangeRelaxed(ptr, expected, value);
+  return old != expected;
+}
+
+int8_t Simulator::readB(int32_t addr) {
+  if (handleWasmSegFault(addr, 1)) {
+    return -1;
+  }
+
+  int8_t* ptr = reinterpret_cast<int8_t*>(addr);
+  return *ptr;
+}
+
+void Simulator::writeB(int32_t addr, uint8_t value) {
+  if (handleWasmSegFault(addr, 1)) {
+    return;
+  }
+
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
+  *ptr = value;
+}
+
+void Simulator::writeB(int32_t addr, int8_t value) {
+  if (handleWasmSegFault(addr, 1)) {
+    return;
+  }
+
+  int8_t* ptr = reinterpret_cast<int8_t*>(addr);
+  *ptr = value;
+}
+
+int32_t* Simulator::readDW(int32_t addr) {
+  if (handleWasmSegFault(addr, 8)) {
+    return nullptr;
+  }
+
+  if ((addr & 3) == 0) {
+    int32_t* ptr = reinterpret_cast<int32_t*>(addr);
+    return ptr;
+  }
+
+  printf("Unaligned read at 0x%08x\n", addr);
+  MOZ_CRASH();
+}
+
+void Simulator::writeDW(int32_t addr, int32_t value1, int32_t value2) {
+  if (handleWasmSegFault(addr, 8)) {
+    return;
+  }
+
+  if ((addr & 3) == 0) {
+    int32_t* ptr = reinterpret_cast<int32_t*>(addr);
+    *ptr++ = value1;
+    *ptr = value2;
+    return;
+  }
+
+  printf("Unaligned write at 0x%08x\n", addr);
+  MOZ_CRASH();
+}
+
+int32_t Simulator::readExDW(int32_t addr, int32_t* hibits) {
+  if (addr & 3) {
+    MOZ_CRASH("Unaligned exclusive read");
+  }
+
+  if (handleWasmSegFault(addr, 8)) {
+    return -1;
+  }
+
+  SharedMem<uint64_t*> ptr =
+      SharedMem<uint64_t*>::shared(reinterpret_cast<uint64_t*>(addr));
+  // The spec says that the low part of value shall be read from addr and
+  // the high part shall be read from addr+4.  On a little-endian system
+  // where we read a 64-bit quadword the low part of the value will be in
+  // the low part of the quadword, and the high part of the value in the
+  // high part of the quadword.
+  uint64_t value = loadRelaxed(ptr);
+  exclusiveMonitorSet(value);
+  *hibits = int32_t(value >> 32);
+  return int32_t(value);
+}
+
+int32_t Simulator::writeExDW(int32_t addr, int32_t value1, int32_t value2) {
+  if (addr & 3) {
+    MOZ_CRASH("Unaligned exclusive write");
+  }
+
+  if (handleWasmSegFault(addr, 8)) {
+    return -1;
+  }
+
+  SharedMem<uint64_t*> ptr =
+      SharedMem<uint64_t*>::shared(reinterpret_cast<uint64_t*>(addr));
+  // The spec says that value1 shall be stored at addr and value2 at
+  // addr+4.  On a little-endian system that means constructing a 64-bit
+  // value where value1 is in the low half of a 64-bit quadword and value2
+  // is in the high half of the quadword.
+  uint64_t value = (uint64_t(value2) << 32) | uint32_t(value1);
+  bool held;
+  uint64_t expected = exclusiveMonitorGetAndClear(&held);
+  if (!held) {
+    return 1;
+  }
+  uint64_t old = compareExchangeRelaxed(ptr, expected, value);
+  return old != expected;
+}
+
+uintptr_t Simulator::stackLimit() const { return stackLimit_; }
+
+uintptr_t* Simulator::addressOfStackLimit() { return &stackLimit_; }
+
+bool Simulator::overRecursed(uintptr_t newsp) const {
+  if (newsp == 0) {
+    newsp = get_register(sp);
+  }
+  return newsp <= stackLimit();
+}
+
+bool Simulator::overRecursedWithExtra(uint32_t extra) const {
+  uintptr_t newsp = get_register(sp) - extra;
+  return newsp <= stackLimit();
+}
+
+// Checks if the current instruction should be executed based on its condition
+// bits.
+bool Simulator::conditionallyExecute(SimInstruction* instr) {
+  switch (instr->conditionField()) {
+    case Assembler::EQ:
+      return z_flag_;
+    case Assembler::NE:
+      return !z_flag_;
+    case Assembler::CS:
+      return c_flag_;
+    case Assembler::CC:
+      return !c_flag_;
+    case Assembler::MI:
+      return n_flag_;
+    case Assembler::PL:
+      return !n_flag_;
+    case Assembler::VS:
+      return v_flag_;
+    case Assembler::VC:
+      return !v_flag_;
+    case Assembler::HI:
+      return c_flag_ && !z_flag_;
+    case Assembler::LS:
+      return !c_flag_ || z_flag_;
+    case Assembler::GE:
+      return n_flag_ == v_flag_;
+    case Assembler::LT:
+      return n_flag_ != v_flag_;
+    case Assembler::GT:
+      return !z_flag_ && (n_flag_ == v_flag_);
+    case Assembler::LE:
+      return z_flag_ || (n_flag_ != v_flag_);
+    case Assembler::AL:
+      return true;
+    default:
+      MOZ_CRASH();
+  }
+  return false;
+}
+
+// Calculate and set the Negative and Zero flags.
+void Simulator::setNZFlags(int32_t val) {
+  n_flag_ = (val < 0);
+  z_flag_ = (val == 0);
+}
+
+// Set the Carry flag.
+void Simulator::setCFlag(bool val) { c_flag_ = val; }
+
+// Set the oVerflow flag.
+void Simulator::setVFlag(bool val) { v_flag_ = val; }
+
+// Calculate C flag value for additions.
+bool Simulator::carryFrom(int32_t left, int32_t right, int32_t carry) {
+  uint32_t uleft = static_cast<uint32_t>(left);
+  uint32_t uright = static_cast<uint32_t>(right);
+  uint32_t urest = 0xffffffffU - uleft;
+  return (uright > urest) ||
+         (carry && (((uright + 1) > urest) || (uright > (urest - 1))));
+}
+
+// Calculate C flag value for subtractions.
+bool Simulator::borrowFrom(int32_t left, int32_t right) {
+  uint32_t uleft = static_cast<uint32_t>(left);
+  uint32_t uright = static_cast<uint32_t>(right);
+  return (uright > uleft);
+}
+
+// Calculate V flag value for additions and subtractions.
+bool Simulator::overflowFrom(int32_t alu_out, int32_t left, int32_t right,
+                             bool addition) {
+  bool overflow;
+  if (addition) {
+    // Operands have the same sign.
+    overflow = ((left >= 0 && right >= 0) || (left < 0 && right < 0))
+               // And operands and result have different sign.
+               && ((left < 0 && alu_out >= 0) || (left >= 0 && alu_out < 0));
+  } else {
+    // Operands have different signs.
+    overflow = ((left < 0 && right >= 0) || (left >= 0 && right < 0))
+               // And first operand and result have different signs.
+               && ((left < 0 && alu_out >= 0) || (left >= 0 && alu_out < 0));
+  }
+  return overflow;
+}
+
+// Support for VFP comparisons.
+void Simulator::compute_FPSCR_Flags(double val1, double val2) {
+  if (std::isnan(val1) || std::isnan(val2)) {
+    n_flag_FPSCR_ = false;
+    z_flag_FPSCR_ = false;
+    c_flag_FPSCR_ = true;
+    v_flag_FPSCR_ = true;
+    // All non-NaN cases.
+  } else if (val1 == val2) {
+    n_flag_FPSCR_ = false;
+    z_flag_FPSCR_ = true;
+    c_flag_FPSCR_ = true;
+    v_flag_FPSCR_ = false;
+  } else if (val1 < val2) {
+    n_flag_FPSCR_ = true;
+    z_flag_FPSCR_ = false;
+    c_flag_FPSCR_ = false;
+    v_flag_FPSCR_ = false;
+  } else {
+    // Case when (val1 > val2).
+    n_flag_FPSCR_ = false;
+    z_flag_FPSCR_ = false;
+    c_flag_FPSCR_ = true;
+    v_flag_FPSCR_ = false;
+  }
+}
+
+void Simulator::copy_FPSCR_to_APSR() {
+  n_flag_ = n_flag_FPSCR_;
+  z_flag_ = z_flag_FPSCR_;
+  c_flag_ = c_flag_FPSCR_;
+  v_flag_ = v_flag_FPSCR_;
+}
+
+// Addressing Mode 1 - Data-processing operands:
+// Get the value based on the shifter_operand with register.
+int32_t Simulator::getShiftRm(SimInstruction* instr, bool* carry_out) {
+  ShiftType shift = instr->shifttypeValue();
+  int shift_amount = instr->shiftAmountValue();
+  int32_t result = get_register(instr->rmValue());
+  if (instr->bit(4) == 0) {
+    // By immediate.
+    if (shift == ROR && shift_amount == 0) {
+      MOZ_CRASH("NYI");
+      return result;
+    }
+    if ((shift == LSR || shift == ASR) && shift_amount == 0) {
+      shift_amount = 32;
+    }
+    switch (shift) {
+      case ASR: {
+        if (shift_amount == 0) {
+          if (result < 0) {
+            result = 0xffffffff;
+            *carry_out = true;
+          } else {
+            result = 0;
+            *carry_out = false;
+          }
+        } else {
+          result >>= (shift_amount - 1);
+          *carry_out = (result & 1) == 1;
+          result >>= 1;
+        }
+        break;
+      }
+
+      case LSL: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else {
+          result <<= (shift_amount - 1);
+          *carry_out = (result < 0);
+          result <<= 1;
+        }
+        break;
+      }
+
+      case LSR: {
+        if (shift_amount == 0) {
+          result = 0;
+          *carry_out = c_flag_;
+        } else {
+          uint32_t uresult = static_cast<uint32_t>(result);
+          uresult >>= (shift_amount - 1);
+          *carry_out = (uresult & 1) == 1;
+          uresult >>= 1;
+          result = static_cast<int32_t>(uresult);
+        }
+        break;
+      }
+
+      case ROR: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else {
+          uint32_t left = static_cast<uint32_t>(result) >> shift_amount;
+          uint32_t right = static_cast<uint32_t>(result) << (32 - shift_amount);
+          result = right | left;
+          *carry_out = (static_cast<uint32_t>(result) >> 31) != 0;
+        }
+        break;
+      }
+
+      default:
+        MOZ_CRASH();
+    }
+  } else {
+    // By register.
+    int rs = instr->rsValue();
+    shift_amount = get_register(rs) & 0xff;
+    switch (shift) {
+      case ASR: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else if (shift_amount < 32) {
+          result >>= (shift_amount - 1);
+          *carry_out = (result & 1) == 1;
+          result >>= 1;
+        } else {
+          MOZ_ASSERT(shift_amount >= 32);
+          if (result < 0) {
+            *carry_out = true;
+            result = 0xffffffff;
+          } else {
+            *carry_out = false;
+            result = 0;
+          }
+        }
+        break;
+      }
+
+      case LSL: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else if (shift_amount < 32) {
+          result <<= (shift_amount - 1);
+          *carry_out = (result < 0);
+          result <<= 1;
+        } else if (shift_amount == 32) {
+          *carry_out = (result & 1) == 1;
+          result = 0;
+        } else {
+          MOZ_ASSERT(shift_amount > 32);
+          *carry_out = false;
+          result = 0;
+        }
+        break;
+      }
+
+      case LSR: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else if (shift_amount < 32) {
+          uint32_t uresult = static_cast<uint32_t>(result);
+          uresult >>= (shift_amount - 1);
+          *carry_out = (uresult & 1) == 1;
+          uresult >>= 1;
+          result = static_cast<int32_t>(uresult);
+        } else if (shift_amount == 32) {
+          *carry_out = (result < 0);
+          result = 0;
+        } else {
+          *carry_out = false;
+          result = 0;
+        }
+        break;
+      }
+
+      case ROR: {
+        if (shift_amount == 0) {
+          *carry_out = c_flag_;
+        } else {
+          uint32_t left = static_cast<uint32_t>(result) >> shift_amount;
+          uint32_t right = static_cast<uint32_t>(result) << (32 - shift_amount);
+          result = right | left;
+          *carry_out = (static_cast<uint32_t>(result) >> 31) != 0;
+        }
+        break;
+      }
+
+      default:
+        MOZ_CRASH();
+    }
+  }
+  return result;
+}
+
+// Addressing Mode 1 - Data-processing operands:
+// Get the value based on the shifter_operand with immediate.
+int32_t Simulator::getImm(SimInstruction* instr, bool* carry_out) {
+  int rotate = instr->rotateValue() * 2;
+  int immed8 = instr->immed8Value();
+  int imm = (immed8 >> rotate) | (immed8 << (32 - rotate));
+  *carry_out = (rotate == 0) ? c_flag_ : (imm < 0);
+  return imm;
+}
+
+int32_t Simulator::processPU(SimInstruction* instr, int num_regs, int reg_size,
+                             intptr_t* start_address, intptr_t* end_address) {
+  int rn = instr->rnValue();
+  int32_t rn_val = get_register(rn);
+  switch (instr->PUField()) {
+    case da_x:
+      MOZ_CRASH();
+      break;
+    case ia_x:
+      *start_address = rn_val;
+      *end_address = rn_val + (num_regs * reg_size) - reg_size;
+      rn_val = rn_val + (num_regs * reg_size);
+      break;
+    case db_x:
+      *start_address = rn_val - (num_regs * reg_size);
+      *end_address = rn_val - reg_size;
+      rn_val = *start_address;
+      break;
+    case ib_x:
+      *start_address = rn_val + reg_size;
+      *end_address = rn_val + (num_regs * reg_size);
+      rn_val = *end_address;
+      break;
+    default:
+      MOZ_CRASH();
+  }
+  return rn_val;
+}
+
+// Addressing Mode 4 - Load and Store Multiple
+void Simulator::handleRList(SimInstruction* instr, bool load) {
+  int rlist = instr->rlistValue();
+  int num_regs = mozilla::CountPopulation32(rlist);
+
+  intptr_t start_address = 0;
+  intptr_t end_address = 0;
+  int32_t rn_val =
+      processPU(instr, num_regs, sizeof(void*), &start_address, &end_address);
+  intptr_t* address = reinterpret_cast<intptr_t*>(start_address);
+
+  // Catch null pointers a little earlier.
+  MOZ_ASSERT(start_address > 8191 || start_address < 0);
+
+  int reg = 0;
+  while (rlist != 0) {
+    if ((rlist & 1) != 0) {
+      if (load) {
+        set_register(reg, *address);
+      } else {
+        *address = get_register(reg);
+      }
+      address += 1;
+    }
+    reg++;
+    rlist >>= 1;
+  }
+  MOZ_ASSERT(end_address == ((intptr_t)address) - 4);
+  if (instr->hasW()) {
+    set_register(instr->rnValue(), rn_val);
+  }
+}
+
+// Addressing Mode 6 - Load and Store Multiple Coprocessor registers.
+void Simulator::handleVList(SimInstruction* instr) {
+  VFPRegPrecision precision =
+      (instr->szValue() == 0) ? kSinglePrecision : kDoublePrecision;
+  int operand_size = (precision == kSinglePrecision) ? 4 : 8;
+  bool load = (instr->VLValue() == 0x1);
+
+  int vd;
+  int num_regs;
+  vd = instr->VFPDRegValue(precision);
+  if (precision == kSinglePrecision) {
+    num_regs = instr->immed8Value();
+  } else {
+    num_regs = instr->immed8Value() / 2;
+  }
+
+  intptr_t start_address = 0;
+  intptr_t end_address = 0;
+  int32_t rn_val =
+      processPU(instr, num_regs, operand_size, &start_address, &end_address);
+
+  intptr_t* address = reinterpret_cast<intptr_t*>(start_address);
+  for (int reg = vd; reg < vd + num_regs; reg++) {
+    if (precision == kSinglePrecision) {
+      if (load) {
+        set_s_register_from_sinteger(
+            reg, readW(reinterpret_cast<int32_t>(address), instr));
+      } else {
+        writeW(reinterpret_cast<int32_t>(address),
+               get_sinteger_from_s_register(reg), instr);
+      }
+      address += 1;
+    } else {
+      if (load) {
+        int32_t data[] = {readW(reinterpret_cast<int32_t>(address), instr),
+                          readW(reinterpret_cast<int32_t>(address + 1), instr)};
+        double d;
+        memcpy(&d, data, 8);
+        set_d_register_from_double(reg, d);
+      } else {
+        int32_t data[2];
+        double d;
+        get_double_from_d_register(reg, &d);
+        memcpy(data, &d, 8);
+        writeW(reinterpret_cast<int32_t>(address), data[0], instr);
+        writeW(reinterpret_cast<int32_t>(address + 1), data[1], instr);
+      }
+      address += 2;
+    }
+  }
+  MOZ_ASSERT(reinterpret_cast<intptr_t>(address) - operand_size == end_address);
+  if (instr->hasW()) {
+    set_register(instr->rnValue(), rn_val);
+  }
+}
+
+// Note: With the code below we assume that all runtime calls return a 64 bits
+// result. If they don't, the r1 result register contains a bogus value, which
+// is fine because it is caller-saved.
+typedef int64_t (*Prototype_General0)();
+typedef int64_t (*Prototype_General1)(int32_t arg0);
+typedef int64_t (*Prototype_General2)(int32_t arg0, int32_t arg1);
+typedef int64_t (*Prototype_General3)(int32_t arg0, int32_t arg1, int32_t arg2);
+typedef int64_t (*Prototype_General4)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3);
+typedef int64_t (*Prototype_General5)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3, int32_t arg4);
+typedef int64_t (*Prototype_General6)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3, int32_t arg4, int32_t arg5);
+typedef int64_t (*Prototype_General7)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3, int32_t arg4, int32_t arg5,
+                                      int32_t arg6);
+typedef int64_t (*Prototype_General8)(int32_t arg0, int32_t arg1, int32_t arg2,
+                                      int32_t arg3, int32_t arg4, int32_t arg5,
+                                      int32_t arg6, int32_t arg7);
+typedef int64_t (*Prototype_GeneralGeneralGeneralInt64)(int32_t arg0,
+                                                        int32_t arg1,
+                                                        int32_t arg2,
+                                                        int64_t arg3);
+typedef int64_t (*Prototype_GeneralGeneralInt64Int64)(int32_t arg0,
+                                                      int32_t arg1,
+                                                      int64_t arg2,
+                                                      int64_t arg3);
+
+typedef double (*Prototype_Double_None)();
+typedef double (*Prototype_Double_Double)(double arg0);
+typedef double (*Prototype_Double_Int)(int32_t arg0);
+typedef double (*Prototype_Double_IntInt)(int32_t arg0, int32_t arg1);
+typedef int32_t (*Prototype_Int_Double)(double arg0);
+typedef int64_t (*Prototype_Int64_Double)(double arg0);
+typedef int32_t (*Prototype_Int_DoubleIntInt)(double arg0, int32_t arg1,
+                                              int32_t arg2);
+typedef int32_t (*Prototype_Int_IntDoubleIntInt)(int32_t arg0, double arg1,
+                                                 int32_t arg2, int32_t arg3);
+
+typedef int32_t (*Prototype_Int_Float32)(float arg0);
+typedef float (*Prototype_Float32_Float32)(float arg0);
+typedef float (*Prototype_Float32_Float32Float32)(float arg0, float arg1);
+typedef float (*Prototype_Float32_IntInt)(int arg0, int arg1);
+
+typedef double (*Prototype_Double_DoubleInt)(double arg0, int32_t arg1);
+typedef double (*Prototype_Double_IntDouble)(int32_t arg0, double arg1);
+typedef double (*Prototype_Double_DoubleDouble)(double arg0, double arg1);
+typedef int32_t (*Prototype_Int_IntDouble)(int32_t arg0, double arg1);
+typedef int32_t (*Prototype_Int_DoubleInt)(double arg0, int32_t arg1);
+
+typedef double (*Prototype_Double_DoubleDoubleDouble)(double arg0, double arg1,
+                                                      double arg2);
+typedef double (*Prototype_Double_DoubleDoubleDoubleDouble)(double arg0,
+                                                            double arg1,
+                                                            double arg2,
+                                                            double arg3);
+
+typedef int32_t (*Prototype_Int32_General)(int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32)(int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32)(int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32)(int32_t, int32_t,
+                                                               int32_t, int32_t,
+                                                               int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32)(
+    int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32General)(
+    int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General)(
+    int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (
+    *Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General)(
+    int32_t, int32_t, float, float, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (
+    *Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General)(
+    int32_t, int32_t, float, float, float, float, int32_t, int32_t, int32_t,
+    int32_t, int32_t);
+typedef int32_t (
+    *Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General)(
+    int32_t, int32_t, float, float, int32_t, float, float, int32_t, float,
+    int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32General)(
+    int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int64)(int32_t, int32_t,
+                                                          int32_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32General)(int32_t, int32_t,
+                                                            int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int64Int64)(int32_t, int32_t,
+                                                          int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32)(int32_t, int32_t,
+                                                            int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32Int32)(
+    int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneral)(int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralGeneral)(int32_t, int32_t,
+                                                         int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32Int32)(int32_t, int32_t,
+                                                            int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int32Int32)(int32_t, int64_t,
+                                                               int32_t, int32_t,
+                                                               int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32)(int32_t, int64_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64)(int32_t, int64_t,
+                                                          int32_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64General)(
+    int32_t, int64_t, int32_t, int64_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64)(int32_t, int64_t,
+                                                          int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64General)(int32_t, int64_t,
+                                                            int64_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64General)(
+    int32_t, int64_t, int64_t, int64_t, int32_t);
+typedef int32_t (*Prototype_General_GeneralInt32)(int32_t, int32_t);
+typedef int32_t (*Prototype_General_GeneralInt32Int32)(int32_t, int32_t,
+                                                       int32_t);
+typedef int32_t (*Prototype_General_GeneralInt32General)(int32_t, int32_t,
+                                                         int32_t);
+typedef int32_t (*Prototype_General_GeneralInt32Int32GeneralInt32)(
+    int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32General)(int32_t, int32_t,
+                                                              int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32)(
+    int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+typedef int64_t (*Prototype_Int64_General)(int32_t);
+typedef int64_t (*Prototype_Int64_GeneralInt64)(int32_t, int64_t);
+
+// Fill the volatile registers with scratch values.
+//
+// Some of the ABI calls assume that the float registers are not scratched,
+// even though the ABI defines them as volatile - a performance
+// optimization. These are all calls passing operands in integer registers,
+// so for now the simulator does not scratch any float registers for these
+// calls. Should try to narrow it further in future.
+//
+void Simulator::scratchVolatileRegisters(bool scratchFloat) {
+  int32_t scratch_value = 0xa5a5a5a5 ^ uint32_t(icount_);
+  set_register(r0, scratch_value);
+  set_register(r1, scratch_value);
+  set_register(r2, scratch_value);
+  set_register(r3, scratch_value);
+  set_register(r12, scratch_value);  // Intra-Procedure-call scratch register.
+  set_register(r14, scratch_value);  // Link register.
+
+  if (scratchFloat) {
+    uint64_t scratch_value_d =
+        0x5a5a5a5a5a5a5a5aLU ^ uint64_t(icount_) ^ (uint64_t(icount_) << 30);
+    for (uint32_t i = d0; i < d8; i++) {
+      set_d_register(i, &scratch_value_d);
+    }
+    for (uint32_t i = d16; i < FloatRegisters::TotalPhys; i++) {
+      set_d_register(i, &scratch_value_d);
+    }
+  }
+}
+
+static int64_t MakeInt64(int32_t first, int32_t second) {
+  // Little-endian order.
+  return ((int64_t)second << 32) | (uint32_t)first;
+}
+
+// Software interrupt instructions are used by the simulator to call into C++.
+void Simulator::softwareInterrupt(SimInstruction* instr) {
+  int svc = instr->svcValue();
+  switch (svc) {
+    case kCallRtRedirected: {
+      Redirection* redirection = Redirection::FromSwiInstruction(instr);
+      int32_t arg0 = get_register(r0);
+      int32_t arg1 = get_register(r1);
+      int32_t arg2 = get_register(r2);
+      int32_t arg3 = get_register(r3);
+      int32_t* stack_pointer = reinterpret_cast<int32_t*>(get_register(sp));
+      int32_t arg4 = stack_pointer[0];
+      int32_t arg5 = stack_pointer[1];
+      int32_t arg6 = stack_pointer[2];
+      int32_t arg7 = stack_pointer[3];
+      int32_t arg8 = stack_pointer[4];
+      int32_t arg9 = stack_pointer[5];
+      int32_t arg10 = stack_pointer[6];
+      int32_t arg11 = stack_pointer[7];
+      int32_t arg12 = stack_pointer[8];
+      int32_t arg13 = stack_pointer[9];
+
+      int32_t saved_lr = get_register(lr);
+      intptr_t external =
+          reinterpret_cast<intptr_t>(redirection->nativeFunction());
+
+      bool stack_aligned = (get_register(sp) & (ABIStackAlignment - 1)) == 0;
+      if (!stack_aligned) {
+        fprintf(stderr, "Runtime call with unaligned stack!\n");
+        MOZ_CRASH();
+      }
+
+      if (single_stepping_) {
+        single_step_callback_(single_step_callback_arg_, this, nullptr);
+      }
+
+      switch (redirection->type()) {
+        case Args_General0: {
+          Prototype_General0 target =
+              reinterpret_cast<Prototype_General0>(external);
+          int64_t result = target();
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General1: {
+          Prototype_General1 target =
+              reinterpret_cast<Prototype_General1>(external);
+          int64_t result = target(arg0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General2: {
+          Prototype_General2 target =
+              reinterpret_cast<Prototype_General2>(external);
+          int64_t result = target(arg0, arg1);
+          // The ARM backend makes calls to __aeabi_idivmod and
+          // __aeabi_uidivmod assuming that the float registers are
+          // non-volatile as a performance optimization, so the float
+          // registers must not be scratch when calling these.
+          bool scratchFloat =
+              target != __aeabi_idivmod && target != __aeabi_uidivmod;
+          scratchVolatileRegisters(/* scratchFloat = */ scratchFloat);
+          setCallResult(result);
+          break;
+        }
+        case Args_General3: {
+          Prototype_General3 target =
+              reinterpret_cast<Prototype_General3>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true*/);
+          setCallResult(result);
+          break;
+        }
+        case Args_General4: {
+          Prototype_General4 target =
+              reinterpret_cast<Prototype_General4>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true*/);
+          setCallResult(result);
+          break;
+        }
+        case Args_General5: {
+          Prototype_General5 target =
+              reinterpret_cast<Prototype_General5>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General6: {
+          Prototype_General6 target =
+              reinterpret_cast<Prototype_General6>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General7: {
+          Prototype_General7 target =
+              reinterpret_cast<Prototype_General7>(external);
+          int32_t arg6 = stack_pointer[2];
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5, arg6);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General8: {
+          Prototype_General8 target =
+              reinterpret_cast<Prototype_General8>(external);
+          int32_t arg6 = stack_pointer[2];
+          int32_t arg7 = stack_pointer[3];
+          int64_t result =
+              target(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int_GeneralGeneralGeneralInt64: {
+          Prototype_GeneralGeneralGeneralInt64 target =
+              reinterpret_cast<Prototype_GeneralGeneralGeneralInt64>(external);
+          // The int64 arg is not split across register and stack
+          int64_t result = target(arg0, arg1, arg2, MakeInt64(arg4, arg5));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int_GeneralGeneralInt64Int64: {
+          Prototype_GeneralGeneralInt64Int64 target =
+              reinterpret_cast<Prototype_GeneralGeneralInt64Int64>(external);
+          int64_t result =
+              target(arg0, arg1, MakeInt64(arg2, arg3), MakeInt64(arg4, arg5));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int64_Double: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Int64_Double target =
+              reinterpret_cast<Prototype_Int64_Double>(external);
+          int64_t result = target(dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Double_None: {
+          Prototype_Double_None target =
+              reinterpret_cast<Prototype_Double_None>(external);
+          double dresult = target();
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Int_Double: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Int_Double target =
+              reinterpret_cast<Prototype_Int_Double>(external);
+          int32_t res = target(dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, res);
+          break;
+        }
+        case Args_Int_Float32: {
+          float fval0;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(0, &fval0);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg0);
+          }
+          auto target = reinterpret_cast<Prototype_Int_Float32>(external);
+          int32_t res = target(fval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, res);
+          break;
+        }
+        case Args_Double_Double: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Double_Double target =
+              reinterpret_cast<Prototype_Double_Double>(external);
+          double dresult = target(dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Float32_Float32: {
+          float fval0;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(0, &fval0);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg0);
+          }
+          Prototype_Float32_Float32 target =
+              reinterpret_cast<Prototype_Float32_Float32>(external);
+          float fresult = target(fval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultFloat(fresult);
+          break;
+        }
+        case Args_Float32_Float32Float32: {
+          float fval0, fval1;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(0, &fval0);
+            get_float_from_s_register(1, &fval1);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg0);
+            fval1 = mozilla::BitwiseCast<float>(arg1);
+          }
+          Prototype_Float32_Float32Float32 target =
+              reinterpret_cast<Prototype_Float32_Float32Float32>(external);
+          float fresult = target(fval0, fval1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultFloat(fresult);
+          break;
+        }
+        case Args_Float32_IntInt: {
+          Prototype_Float32_IntInt target =
+              reinterpret_cast<Prototype_Float32_IntInt>(external);
+          float fresult = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultFloat(fresult);
+          break;
+        }
+        case Args_Double_Int: {
+          Prototype_Double_Int target =
+              reinterpret_cast<Prototype_Double_Int>(external);
+          double dresult = target(arg0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_IntInt: {
+          Prototype_Double_IntInt target =
+              reinterpret_cast<Prototype_Double_IntInt>(external);
+          double dresult = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_DoubleInt: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Double_DoubleInt target =
+              reinterpret_cast<Prototype_Double_DoubleInt>(external);
+          double dresult = target(dval0, ival);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_DoubleDouble: {
+          double dval0, dval1;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          Prototype_Double_DoubleDouble target =
+              reinterpret_cast<Prototype_Double_DoubleDouble>(external);
+          double dresult = target(dval0, dval1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_IntDouble: {
+          int32_t ival = get_register(0);
+          double dval0;
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval0);
+          } else {
+            dval0 = get_double_from_register_pair(2);
+          }
+          Prototype_Double_IntDouble target =
+              reinterpret_cast<Prototype_Double_IntDouble>(external);
+          double dresult = target(ival, dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Int_IntDouble: {
+          int32_t ival = get_register(0);
+          double dval0;
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval0);
+          } else {
+            dval0 = get_double_from_register_pair(2);
+          }
+          Prototype_Int_IntDouble target =
+              reinterpret_cast<Prototype_Int_IntDouble>(external);
+          int32_t result = target(ival, dval0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, result);
+          break;
+        }
+        case Args_Int_DoubleInt: {
+          double dval;
+          int32_t result;
+          Prototype_Int_DoubleInt target =
+              reinterpret_cast<Prototype_Int_DoubleInt>(external);
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval);
+            result = target(dval, arg0);
+          } else {
+            dval = get_double_from_register_pair(0);
+            result = target(dval, arg2);
+          }
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, result);
+          break;
+        }
+        case Args_Int_DoubleIntInt: {
+          double dval;
+          int32_t result;
+          Prototype_Int_DoubleIntInt target =
+              reinterpret_cast<Prototype_Int_DoubleIntInt>(external);
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval);
+            result = target(dval, arg0, arg1);
+          } else {
+            dval = get_double_from_register_pair(0);
+            result = target(dval, arg2, arg3);
+          }
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, result);
+          break;
+        }
+        case Args_Int_IntDoubleIntInt: {
+          double dval;
+          int32_t result;
+          Prototype_Int_IntDoubleIntInt target =
+              reinterpret_cast<Prototype_Int_IntDoubleIntInt>(external);
+          if (UseHardFpABI()) {
+            get_double_from_d_register(0, &dval);
+            result = target(arg0, dval, arg1, arg2);
+          } else {
+            dval = get_double_from_register_pair(2);
+            result = target(arg0, dval, arg4, arg5);
+          }
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          set_register(r0, result);
+          break;
+        }
+        case Args_Double_DoubleDoubleDouble: {
+          double dval0, dval1, dval2;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          // the last argument is on stack
+          getFpFromStack(stack_pointer, &dval2);
+          Prototype_Double_DoubleDoubleDouble target =
+              reinterpret_cast<Prototype_Double_DoubleDoubleDouble>(external);
+          double dresult = target(dval0, dval1, dval2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+        case Args_Double_DoubleDoubleDoubleDouble: {
+          double dval0, dval1, dval2, dval3;
+          int32_t ival;
+          getFpArgs(&dval0, &dval1, &ival);
+          // the two last arguments are on stack
+          getFpFromStack(stack_pointer, &dval2);
+          getFpFromStack(stack_pointer + 2, &dval3);
+          Prototype_Double_DoubleDoubleDoubleDouble target =
+              reinterpret_cast<Prototype_Double_DoubleDoubleDoubleDouble>(
+                  external);
+          double dresult = target(dval0, dval1, dval2, dval3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResultDouble(dresult);
+          break;
+        }
+
+        case Args_Int32_General: {
+          Prototype_Int32_General target =
+              reinterpret_cast<Prototype_Int32_General>(external);
+          int64_t result = target(arg0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32: {
+          Prototype_Int32_GeneralInt32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32>(external);
+          int64_t result = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32: {
+          Prototype_Int32_GeneralInt32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32Int32: {
+          Prototype_Int32_GeneralInt32Int32Int32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32Int32Int32: {
+          Prototype_Int32_GeneralInt32Int32Int32Int32Int32 target =
+              reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Int32Int32Int32Int32>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32Int32General: {
+          Prototype_Int32_GeneralInt32Int32Int32Int32General target =
+              reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Int32Int32Int32General>(external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32Int32Int32Int32General: {
+          Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General target =
+              reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General>(
+                  external);
+          int64_t result =
+              target(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General: {
+          float fval0, fval1;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(2, &fval0);
+            get_float_from_s_register(3, &fval1);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg2);
+            fval1 = mozilla::BitwiseCast<float>(arg3);
+          }
+          Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General
+              target = reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General>(
+                  external);
+          int64_t result =
+              target(arg0, arg1, fval0, fval1, arg4, arg5, arg6, arg7);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General: {
+          float fval0, fval1, fval2, fval3;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(2, &fval0);
+            get_float_from_s_register(3, &fval1);
+            get_float_from_s_register(4, &fval2);
+            get_float_from_s_register(5, &fval3);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg2);
+            fval1 = mozilla::BitwiseCast<float>(arg3);
+            fval2 = mozilla::BitwiseCast<float>(arg4);
+            fval3 = mozilla::BitwiseCast<float>(arg5);
+          }
+          Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General
+              target = reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General>(
+                  external);
+          int64_t result = target(arg0, arg1, fval0, fval1, fval2, fval3, arg6,
+                                  arg7, arg8, arg9, arg10);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General: {
+          float fval0, fval1, fval2, fval3, fval4;
+          if (UseHardFpABI()) {
+            get_float_from_s_register(2, &fval0);
+            get_float_from_s_register(3, &fval1);
+            get_float_from_s_register(5, &fval2);
+            get_float_from_s_register(6, &fval3);
+            get_float_from_s_register(8, &fval4);
+          } else {
+            fval0 = mozilla::BitwiseCast<float>(arg2);
+            fval1 = mozilla::BitwiseCast<float>(arg3);
+            fval2 = mozilla::BitwiseCast<float>(arg5);
+            fval3 = mozilla::BitwiseCast<float>(arg6);
+            fval4 = mozilla::BitwiseCast<float>(arg8);
+          }
+          Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General
+              target = reinterpret_cast<
+                  Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General>(
+                  external);
+          int64_t result =
+              target(arg0, arg1, fval0, fval1, arg4, fval2, fval3, arg7, fval4,
+                     arg9, arg10, arg11, arg12, arg13);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int32General: {
+          Prototype_Int32_GeneralInt32Int32Int32General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32General>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32Int64: {
+          Prototype_Int32_GeneralInt32Int32Int64 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int64>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, MakeInt64(arg3, arg4));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int32General: {
+          Prototype_Int32_GeneralInt32Int32General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int32General>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32Int64Int64: {
+          Prototype_Int32_GeneralInt32Int64Int64 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32Int64Int64>(
+                  external);
+          int64_t result =
+              target(arg0, arg1, MakeInt64(arg2, arg3), MakeInt64(arg4, arg5));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32GeneralInt32: {
+          Prototype_Int32_GeneralInt32GeneralInt32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt32GeneralInt32Int32: {
+          Prototype_Int32_GeneralInt32GeneralInt32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32Int32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralGeneral: {
+          Prototype_Int32_GeneralGeneral target =
+              reinterpret_cast<Prototype_Int32_GeneralGeneral>(external);
+          int64_t result = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralGeneralGeneral: {
+          Prototype_Int32_GeneralGeneralGeneral target =
+              reinterpret_cast<Prototype_Int32_GeneralGeneralGeneral>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralGeneralInt32Int32: {
+          Prototype_Int32_GeneralGeneralInt32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralGeneralInt32Int32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int32Int32Int32: {
+          Prototype_Int32_GeneralInt64Int32Int32Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int32Int32>(
+                  external);
+          int64_t result =
+              target(arg0, MakeInt64(arg2, arg3), arg4, arg5, arg6);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int32: {
+          Prototype_Int32_GeneralInt64Int32 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int32>(external);
+          int64_t result = target(arg0, MakeInt64(arg2, arg3), arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int32Int64: {
+          Prototype_Int32_GeneralInt64Int32Int64 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64>(
+                  external);
+          int64_t result =
+              target(arg0, MakeInt64(arg2, arg3), arg4, MakeInt64(arg6, arg7));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int32Int64General: {
+          Prototype_Int32_GeneralInt64Int32Int64General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64General>(
+                  external);
+          int64_t result = target(arg0, MakeInt64(arg2, arg3), arg4,
+                                  MakeInt64(arg6, arg7), arg8);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int64Int64: {
+          Prototype_Int32_GeneralInt64Int64Int64 target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64>(
+                  external);
+          int64_t result = target(arg0, MakeInt64(arg2, arg3),
+                                  MakeInt64(arg4, arg5), MakeInt64(arg6, arg7));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int64General: {
+          Prototype_Int32_GeneralInt64Int64General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int64General>(
+                  external);
+          int64_t result =
+              target(arg0, MakeInt64(arg2, arg3), MakeInt64(arg4, arg5), arg6);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int32_GeneralInt64Int64Int64General: {
+          Prototype_Int32_GeneralInt64Int64Int64General target =
+              reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64General>(
+                  external);
+          int64_t result =
+              target(arg0, MakeInt64(arg2, arg3), MakeInt64(arg4, arg5),
+                     MakeInt64(arg6, arg7), arg8);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General_GeneralInt32: {
+          Prototype_General_GeneralInt32 target =
+              reinterpret_cast<Prototype_General_GeneralInt32>(external);
+          int64_t result = target(arg0, arg1);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General_GeneralInt32Int32: {
+          Prototype_General_GeneralInt32Int32 target =
+              reinterpret_cast<Prototype_General_GeneralInt32Int32>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_General_GeneralInt32General: {
+          Prototype_General_GeneralInt32General target =
+              reinterpret_cast<Prototype_General_GeneralInt32General>(external);
+          int64_t result = target(arg0, arg1, arg2);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case js::jit::Args_General_GeneralInt32Int32GeneralInt32: {
+          Prototype_General_GeneralInt32Int32GeneralInt32 target =
+              reinterpret_cast<Prototype_General_GeneralInt32Int32GeneralInt32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case js::jit::Args_Int32_GeneralGeneralInt32General: {
+          Prototype_Int32_GeneralGeneralInt32General target =
+              reinterpret_cast<Prototype_Int32_GeneralGeneralInt32General>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case js::jit::Args_Int32_GeneralGeneralInt32GeneralInt32Int32Int32: {
+          Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32 target =
+              reinterpret_cast<
+                  Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32>(
+                  external);
+          int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5, arg6);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int64_General: {
+          Prototype_Int64_General target =
+              reinterpret_cast<Prototype_Int64_General>(external);
+          int64_t result = target(arg0);
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+        case Args_Int64_GeneralInt64: {
+          Prototype_Int64_GeneralInt64 target =
+              reinterpret_cast<Prototype_Int64_GeneralInt64>(external);
+          int64_t result = target(arg0, MakeInt64(arg2, arg3));
+          scratchVolatileRegisters(/* scratchFloat = true */);
+          setCallResult(result);
+          break;
+        }
+
+        default:
+          MOZ_CRASH("call");
+      }
+
+      if (single_stepping_) {
+        single_step_callback_(single_step_callback_arg_, this, nullptr);
+      }
+
+      set_register(lr, saved_lr);
+      set_pc(get_register(lr));
+      break;
+    }
+    case kBreakpoint: {
+      ArmDebugger dbg(this);
+      dbg.debug();
+      break;
+    }
+    default: {  // Stop uses all codes greater than 1 << 23.
+      if (svc >= (1 << 23)) {
+        uint32_t code = svc & kStopCodeMask;
+        if (isWatchedStop(code)) {
+          increaseStopCounter(code);
+        }
+
+        // Stop if it is enabled, otherwise go on jumping over the stop and
+        // the message address.
+        if (isEnabledStop(code)) {
+          ArmDebugger dbg(this);
+          dbg.stop(instr);
+        } else {
+          set_pc(get_pc() + 2 * SimInstruction::kInstrSize);
+        }
+      } else {
+        // This is not a valid svc code.
+        MOZ_CRASH();
+        break;
+      }
+    }
+  }
+}
+
+void Simulator::canonicalizeNaN(double* value) {
+  if (!wasm::CodeExists && !wasm::LookupCodeSegment(get_pc_as<void*>()) &&
+      FPSCR_default_NaN_mode_) {
+    *value = JS::CanonicalizeNaN(*value);
+  }
+}
+
+void Simulator::canonicalizeNaN(float* value) {
+  if (!wasm::CodeExists && !wasm::LookupCodeSegment(get_pc_as<void*>()) &&
+      FPSCR_default_NaN_mode_) {
+    *value = JS::CanonicalizeNaN(*value);
+  }
+}
+
+// Stop helper functions.
+bool Simulator::isStopInstruction(SimInstruction* instr) {
+  return (instr->bits(27, 24) == 0xF) && (instr->svcValue() >= kStopCode);
+}
+
+bool Simulator::isWatchedStop(uint32_t code) {
+  MOZ_ASSERT(code <= kMaxStopCode);
+  return code < kNumOfWatchedStops;
+}
+
+bool Simulator::isEnabledStop(uint32_t code) {
+  MOZ_ASSERT(code <= kMaxStopCode);
+  // Unwatched stops are always enabled.
+  return !isWatchedStop(code) ||
+         !(watched_stops_[code].count & kStopDisabledBit);
+}
+
+void Simulator::enableStop(uint32_t code) {
+  MOZ_ASSERT(isWatchedStop(code));
+  if (!isEnabledStop(code)) {
+    watched_stops_[code].count &= ~kStopDisabledBit;
+  }
+}
+
+void Simulator::disableStop(uint32_t code) {
+  MOZ_ASSERT(isWatchedStop(code));
+  if (isEnabledStop(code)) {
+    watched_stops_[code].count |= kStopDisabledBit;
+  }
+}
+
+void Simulator::increaseStopCounter(uint32_t code) {
+  MOZ_ASSERT(code <= kMaxStopCode);
+  MOZ_ASSERT(isWatchedStop(code));
+  if ((watched_stops_[code].count & ~(1 << 31)) == 0x7fffffff) {
+    printf(
+        "Stop counter for code %i has overflowed.\n"
+        "Enabling this code and reseting the counter to 0.\n",
+        code);
+    watched_stops_[code].count = 0;
+    enableStop(code);
+  } else {
+    watched_stops_[code].count++;
+  }
+}
+
+// Print a stop status.
+void Simulator::printStopInfo(uint32_t code) {
+  MOZ_ASSERT(code <= kMaxStopCode);
+  if (!isWatchedStop(code)) {
+    printf("Stop not watched.");
+  } else {
+    const char* state = isEnabledStop(code) ? "Enabled" : "Disabled";
+    int32_t count = watched_stops_[code].count & ~kStopDisabledBit;
+    // Don't print the state of unused breakpoints.
+    if (count != 0) {
+      if (watched_stops_[code].desc) {
+        printf("stop %i - 0x%x: \t%s, \tcounter = %i, \t%s\n", code, code,
+               state, count, watched_stops_[code].desc);
+      } else {
+        printf("stop %i - 0x%x: \t%s, \tcounter = %i\n", code, code, state,
+               count);
+      }
+    }
+  }
+}
+
+// Instruction types 0 and 1 are both rolled into one function because they only
+// differ in the handling of the shifter_operand.
+void Simulator::decodeType01(SimInstruction* instr) {
+  int type = instr->typeValue();
+  if (type == 0 && instr->isSpecialType0()) {
+    // Multiply instruction or extra loads and stores.
+    if (instr->bits(7, 4) == 9) {
+      if (instr->bit(24) == 0) {
+        // Raw field decoding here. Multiply instructions have their Rd
+        // in funny places.
+        int rn = instr->rnValue();
+        int rm = instr->rmValue();
+        int rs = instr->rsValue();
+        int32_t rs_val = get_register(rs);
+        int32_t rm_val = get_register(rm);
+        if (instr->bit(23) == 0) {
+          if (instr->bit(21) == 0) {
+            // The MUL instruction description (A 4.1.33) refers to
+            // Rd as being the destination for the operation, but it
+            // confusingly uses the Rn field to encode it.
+            int rd = rn;  // Remap the rn field to the Rd register.
+            int32_t alu_out = rm_val * rs_val;
+            set_register(rd, alu_out);
+            if (instr->hasS()) {
+              setNZFlags(alu_out);
+            }
+          } else {
+            int rd = instr->rdValue();
+            int32_t acc_value = get_register(rd);
+            if (instr->bit(22) == 0) {
+              // The MLA instruction description (A 4.1.28) refers
+              // to the order of registers as "Rd, Rm, Rs,
+              // Rn". But confusingly it uses the Rn field to
+              // encode the Rd register and the Rd field to encode
+              // the Rn register.
+              int32_t mul_out = rm_val * rs_val;
+              int32_t result = acc_value + mul_out;
+              set_register(rn, result);
+            } else {
+              int32_t mul_out = rm_val * rs_val;
+              int32_t result = acc_value - mul_out;
+              set_register(rn, result);
+            }
+          }
+        } else {
+          // The signed/long multiply instructions use the terms RdHi
+          // and RdLo when referring to the target registers. They are
+          // mapped to the Rn and Rd fields as follows:
+          // RdLo == Rd
+          // RdHi == Rn (This is confusingly stored in variable rd here
+          //             because the mul instruction from above uses the
+          //             Rn field to encode the Rd register. Good luck figuring
+          //             this out without reading the ARM instruction manual
+          //             at a very detailed level.)
+          int rd_hi = rn;  // Remap the rn field to the RdHi register.
+          int rd_lo = instr->rdValue();
+          int32_t hi_res = 0;
+          int32_t lo_res = 0;
+          if (instr->bit(22) == 1) {
+            int64_t left_op = static_cast<int32_t>(rm_val);
+            int64_t right_op = static_cast<int32_t>(rs_val);
+            uint64_t result = left_op * right_op;
+            hi_res = static_cast<int32_t>(result >> 32);
+            lo_res = static_cast<int32_t>(result & 0xffffffff);
+          } else {
+            // Unsigned multiply.
+            uint64_t left_op = static_cast<uint32_t>(rm_val);
+            uint64_t right_op = static_cast<uint32_t>(rs_val);
+            uint64_t result = left_op * right_op;
+            hi_res = static_cast<int32_t>(result >> 32);
+            lo_res = static_cast<int32_t>(result & 0xffffffff);
+          }
+          set_register(rd_lo, lo_res);
+          set_register(rd_hi, hi_res);
+          if (instr->hasS()) {
+            MOZ_CRASH();
+          }
+        }
+      } else {
+        if (instr->bits(excl::ExclusiveOpHi, excl::ExclusiveOpLo) ==
+            excl::ExclusiveOpcode) {
+          // Load-exclusive / store-exclusive.
+          if (instr->bit(excl::ExclusiveLoad)) {
+            int rn = instr->rnValue();
+            int rt = instr->rtValue();
+            int32_t address = get_register(rn);
+            switch (instr->bits(excl::ExclusiveSizeHi, excl::ExclusiveSizeLo)) {
+              case excl::ExclusiveWord:
+                set_register(rt, readExW(address, instr));
+                break;
+              case excl::ExclusiveDouble: {
+                MOZ_ASSERT((rt % 2) == 0);
+                int32_t hibits;
+                int32_t lobits = readExDW(address, &hibits);
+                set_register(rt, lobits);
+                set_register(rt + 1, hibits);
+                break;
+              }
+              case excl::ExclusiveByte:
+                set_register(rt, readExBU(address));
+                break;
+              case excl::ExclusiveHalf:
+                set_register(rt, readExHU(address, instr));
+                break;
+            }
+          } else {
+            int rn = instr->rnValue();
+            int rd = instr->rdValue();
+            int rt = instr->bits(3, 0);
+            int32_t address = get_register(rn);
+            int32_t value = get_register(rt);
+            int32_t result = 0;
+            switch (instr->bits(excl::ExclusiveSizeHi, excl::ExclusiveSizeLo)) {
+              case excl::ExclusiveWord:
+                result = writeExW(address, value, instr);
+                break;
+              case excl::ExclusiveDouble: {
+                MOZ_ASSERT((rt % 2) == 0);
+                int32_t value2 = get_register(rt + 1);
+                result = writeExDW(address, value, value2);
+                break;
+              }
+              case excl::ExclusiveByte:
+                result = writeExB(address, (uint8_t)value);
+                break;
+              case excl::ExclusiveHalf:
+                result = writeExH(address, (uint16_t)value, instr);
+                break;
+            }
+            set_register(rd, result);
+          }
+        } else {
+          MOZ_CRASH();  // Not used atm
+        }
+      }
+    } else {
+      // Extra load/store instructions.
+      int rd = instr->rdValue();
+      int rn = instr->rnValue();
+      int32_t rn_val = get_register(rn);
+      int32_t addr = 0;
+      if (instr->bit(22) == 0) {
+        int rm = instr->rmValue();
+        int32_t rm_val = get_register(rm);
+        switch (instr->PUField()) {
+          case da_x:
+            MOZ_ASSERT(!instr->hasW());
+            addr = rn_val;
+            rn_val -= rm_val;
+            set_register(rn, rn_val);
+            break;
+          case ia_x:
+            MOZ_ASSERT(!instr->hasW());
+            addr = rn_val;
+            rn_val += rm_val;
+            set_register(rn, rn_val);
+            break;
+          case db_x:
+            rn_val -= rm_val;
+            addr = rn_val;
+            if (instr->hasW()) {
+              set_register(rn, rn_val);
+            }
+            break;
+          case ib_x:
+            rn_val += rm_val;
+            addr = rn_val;
+            if (instr->hasW()) {
+              set_register(rn, rn_val);
+            }
+            break;
+          default:
+            // The PU field is a 2-bit field.
+            MOZ_CRASH();
+            break;
+        }
+      } else {
+        int32_t imm_val = (instr->immedHValue() << 4) | instr->immedLValue();
+        switch (instr->PUField()) {
+          case da_x:
+            MOZ_ASSERT(!instr->hasW());
+            addr = rn_val;
+            rn_val -= imm_val;
+            set_register(rn, rn_val);
+            break;
+          case ia_x:
+            MOZ_ASSERT(!instr->hasW());
+            addr = rn_val;
+            rn_val += imm_val;
+            set_register(rn, rn_val);
+            break;
+          case db_x:
+            rn_val -= imm_val;
+            addr = rn_val;
+            if (instr->hasW()) {
+              set_register(rn, rn_val);
+            }
+            break;
+          case ib_x:
+            rn_val += imm_val;
+            addr = rn_val;
+            if (instr->hasW()) {
+              set_register(rn, rn_val);
+            }
+            break;
+          default:
+            // The PU field is a 2-bit field.
+            MOZ_CRASH();
+            break;
+        }
+      }
+      if ((instr->bits(7, 4) & 0xd) == 0xd && instr->bit(20) == 0) {
+        MOZ_ASSERT((rd % 2) == 0);
+        if (instr->hasH()) {
+          // The strd instruction.
+          int32_t value1 = get_register(rd);
+          int32_t value2 = get_register(rd + 1);
+          writeDW(addr, value1, value2);
+        } else {
+          // The ldrd instruction.
+          int* rn_data = readDW(addr);
+          if (rn_data) {
+            set_dw_register(rd, rn_data);
+          }
+        }
+      } else if (instr->hasH()) {
+        if (instr->hasSign()) {
+          if (instr->hasL()) {
+            int16_t val = readH(addr, instr);
+            set_register(rd, val);
+          } else {
+            int16_t val = get_register(rd);
+            writeH(addr, val, instr);
+          }
+        } else {
+          if (instr->hasL()) {
+            uint16_t val = readHU(addr, instr);
+            set_register(rd, val);
+          } else {
+            uint16_t val = get_register(rd);
+            writeH(addr, val, instr);
+          }
+        }
+      } else {
+        // Signed byte loads.
+        MOZ_ASSERT(instr->hasSign());
+        MOZ_ASSERT(instr->hasL());
+        int8_t val = readB(addr);
+        set_register(rd, val);
+      }
+      return;
+    }
+  } else if ((type == 0) && instr->isMiscType0()) {
+    if (instr->bits(7, 4) == 0) {
+      if (instr->bit(21) == 0) {
+        // mrs
+        int rd = instr->rdValue();
+        uint32_t flags;
+        if (instr->bit(22) == 0) {
+          // CPSR. Note: The Q flag is not yet implemented!
+          flags = (n_flag_ << 31) | (z_flag_ << 30) | (c_flag_ << 29) |
+                  (v_flag_ << 28);
+        } else {
+          // SPSR
+          MOZ_CRASH();
+        }
+        set_register(rd, flags);
+      } else {
+        // msr
+        if (instr->bits(27, 23) == 2) {
+          // Register operand. For now we only emit mask 0b1100.
+          int rm = instr->rmValue();
+          mozilla::DebugOnly<uint32_t> mask = instr->bits(19, 16);
+          MOZ_ASSERT(mask == (3 << 2));
+
+          uint32_t flags = get_register(rm);
+          n_flag_ = (flags >> 31) & 1;
+          z_flag_ = (flags >> 30) & 1;
+          c_flag_ = (flags >> 29) & 1;
+          v_flag_ = (flags >> 28) & 1;
+        } else {
+          MOZ_CRASH();
+        }
+      }
+    } else if (instr->bits(22, 21) == 1) {
+      int rm = instr->rmValue();
+      switch (instr->bits(7, 4)) {
+        case 1:  // BX
+          set_pc(get_register(rm));
+          break;
+        case 3: {  // BLX
+          uint32_t old_pc = get_pc();
+          set_pc(get_register(rm));
+          set_register(lr, old_pc + SimInstruction::kInstrSize);
+          break;
+        }
+        case 7: {  // BKPT
+          fprintf(stderr, "Simulator hit BKPT.\n");
+          if (getenv("ARM_SIM_DEBUGGER")) {
+            ArmDebugger dbg(this);
+            dbg.debug();
+          } else {
+            fprintf(stderr,
+                    "Use ARM_SIM_DEBUGGER=1 to enter the builtin debugger.\n");
+            MOZ_CRASH("ARM simulator breakpoint");
+          }
+          break;
+        }
+        default:
+          MOZ_CRASH();
+      }
+    } else if (instr->bits(22, 21) == 3) {
+      int rm = instr->rmValue();
+      int rd = instr->rdValue();
+      switch (instr->bits(7, 4)) {
+        case 1: {  // CLZ
+          uint32_t bits = get_register(rm);
+          int leading_zeros = 0;
+          if (bits == 0) {
+            leading_zeros = 32;
+          } else {
+            leading_zeros = mozilla::CountLeadingZeroes32(bits);
+          }
+          set_register(rd, leading_zeros);
+          break;
+        }
+        default:
+          MOZ_CRASH();
+          break;
+      }
+    } else {
+      printf("%08x\n", instr->instructionBits());
+      MOZ_CRASH();
+    }
+  } else if ((type == 1) && instr->isNopType1()) {
+    // NOP.
+  } else if ((type == 1) && instr->isCsdbType1()) {
+    // Speculation barrier. (No-op for the simulator)
+  } else {
+    int rd = instr->rdValue();
+    int rn = instr->rnValue();
+    int32_t rn_val = get_register(rn);
+    int32_t shifter_operand = 0;
+    bool shifter_carry_out = 0;
+    if (type == 0) {
+      shifter_operand = getShiftRm(instr, &shifter_carry_out);
+    } else {
+      MOZ_ASSERT(instr->typeValue() == 1);
+      shifter_operand = getImm(instr, &shifter_carry_out);
+    }
+    int32_t alu_out;
+    switch (instr->opcodeField()) {
+      case OpAnd:
+        alu_out = rn_val & shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpEor:
+        alu_out = rn_val ^ shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpSub:
+        alu_out = rn_val - shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(!borrowFrom(rn_val, shifter_operand));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, false));
+        }
+        break;
+      case OpRsb:
+        alu_out = shifter_operand - rn_val;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(!borrowFrom(shifter_operand, rn_val));
+          setVFlag(overflowFrom(alu_out, shifter_operand, rn_val, false));
+        }
+        break;
+      case OpAdd:
+        alu_out = rn_val + shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(carryFrom(rn_val, shifter_operand));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, true));
+        }
+        break;
+      case OpAdc:
+        alu_out = rn_val + shifter_operand + getCarry();
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(carryFrom(rn_val, shifter_operand, getCarry()));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, true));
+        }
+        break;
+      case OpSbc:
+        alu_out = rn_val - shifter_operand - (getCarry() == 0 ? 1 : 0);
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          MOZ_CRASH();
+        }
+        break;
+      case OpRsc:
+        alu_out = shifter_operand - rn_val - (getCarry() == 0 ? 1 : 0);
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          MOZ_CRASH();
+        }
+        break;
+      case OpTst:
+        if (instr->hasS()) {
+          alu_out = rn_val & shifter_operand;
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        } else {
+          alu_out = instr->immedMovwMovtValue();
+          set_register(rd, alu_out);
+        }
+        break;
+      case OpTeq:
+        if (instr->hasS()) {
+          alu_out = rn_val ^ shifter_operand;
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        } else {
+          // Other instructions matching this pattern are handled in the
+          // miscellaneous instructions part above.
+          MOZ_CRASH();
+        }
+        break;
+      case OpCmp:
+        if (instr->hasS()) {
+          alu_out = rn_val - shifter_operand;
+          setNZFlags(alu_out);
+          setCFlag(!borrowFrom(rn_val, shifter_operand));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, false));
+        } else {
+          alu_out =
+              (get_register(rd) & 0xffff) | (instr->immedMovwMovtValue() << 16);
+          set_register(rd, alu_out);
+        }
+        break;
+      case OpCmn:
+        if (instr->hasS()) {
+          alu_out = rn_val + shifter_operand;
+          setNZFlags(alu_out);
+          setCFlag(carryFrom(rn_val, shifter_operand));
+          setVFlag(overflowFrom(alu_out, rn_val, shifter_operand, true));
+        } else {
+          // Other instructions matching this pattern are handled in the
+          // miscellaneous instructions part above.
+          MOZ_CRASH();
+        }
+        break;
+      case OpOrr:
+        alu_out = rn_val | shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpMov:
+        alu_out = shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpBic:
+        alu_out = rn_val & ~shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      case OpMvn:
+        alu_out = ~shifter_operand;
+        set_register(rd, alu_out);
+        if (instr->hasS()) {
+          setNZFlags(alu_out);
+          setCFlag(shifter_carry_out);
+        }
+        break;
+      default:
+        MOZ_CRASH();
+        break;
+    }
+  }
+}
+
+void Simulator::decodeType2(SimInstruction* instr) {
+  int rd = instr->rdValue();
+  int rn = instr->rnValue();
+  int32_t rn_val = get_register(rn);
+  int32_t im_val = instr->offset12Value();
+  int32_t addr = 0;
+  switch (instr->PUField()) {
+    case da_x:
+      MOZ_ASSERT(!instr->hasW());
+      addr = rn_val;
+      rn_val -= im_val;
+      set_register(rn, rn_val);
+      break;
+    case ia_x:
+      MOZ_ASSERT(!instr->hasW());
+      addr = rn_val;
+      rn_val += im_val;
+      set_register(rn, rn_val);
+      break;
+    case db_x:
+      rn_val -= im_val;
+      addr = rn_val;
+      if (instr->hasW()) {
+        set_register(rn, rn_val);
+      }
+      break;
+    case ib_x:
+      rn_val += im_val;
+      addr = rn_val;
+      if (instr->hasW()) {
+        set_register(rn, rn_val);
+      }
+      break;
+    default:
+      MOZ_CRASH();
+      break;
+  }
+  if (instr->hasB()) {
+    if (instr->hasL()) {
+      uint8_t val = readBU(addr);
+      set_register(rd, val);
+    } else {
+      uint8_t val = get_register(rd);
+      writeB(addr, val);
+    }
+  } else {
+    if (instr->hasL()) {
+      set_register(rd, readW(addr, instr, AllowUnaligned));
+    } else {
+      writeW(addr, get_register(rd), instr, AllowUnaligned);
+    }
+  }
+}
+
+static uint32_t rotateBytes(uint32_t val, int32_t rotate) {
+  switch (rotate) {
+    default:
+      return val;
+    case 1:
+      return (val >> 8) | (val << 24);
+    case 2:
+      return (val >> 16) | (val << 16);
+    case 3:
+      return (val >> 24) | (val << 8);
+  }
+}
+
+void Simulator::decodeType3(SimInstruction* instr) {
+  if (MOZ_UNLIKELY(instr->isUDF())) {
+    uint8_t* newPC;
+    if (wasm::HandleIllegalInstruction(registerState(), &newPC)) {
+      set_pc((int32_t)newPC);
+      return;
+    }
+    MOZ_CRASH("illegal instruction encountered");
+  }
+
+  int rd = instr->rdValue();
+  int rn = instr->rnValue();
+  int32_t rn_val = get_register(rn);
+  bool shifter_carry_out = 0;
+  int32_t shifter_operand = getShiftRm(instr, &shifter_carry_out);
+  int32_t addr = 0;
+  switch (instr->PUField()) {
+    case da_x:
+      MOZ_ASSERT(!instr->hasW());
+      MOZ_CRASH();
+      break;
+    case ia_x: {
+      if (instr->bit(4) == 0) {
+        // Memop.
+      } else {
+        if (instr->bit(5) == 0) {
+          switch (instr->bits(22, 21)) {
+            case 0:
+              if (instr->bit(20) == 0) {
+                if (instr->bit(6) == 0) {
+                  // Pkhbt.
+                  uint32_t rn_val = get_register(rn);
+                  uint32_t rm_val = get_register(instr->rmValue());
+                  int32_t shift = instr->bits(11, 7);
+                  rm_val <<= shift;
+                  set_register(rd, (rn_val & 0xFFFF) | (rm_val & 0xFFFF0000U));
+                } else {
+                  // Pkhtb.
+                  uint32_t rn_val = get_register(rn);
+                  int32_t rm_val = get_register(instr->rmValue());
+                  int32_t shift = instr->bits(11, 7);
+                  if (shift == 0) {
+                    shift = 32;
+                  }
+                  rm_val >>= shift;
+                  set_register(rd, (rn_val & 0xFFFF0000U) | (rm_val & 0xFFFF));
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 1:
+              MOZ_CRASH();
+              break;
+            case 2:
+              MOZ_CRASH();
+              break;
+            case 3: {
+              // Usat.
+              int32_t sat_pos = instr->bits(20, 16);
+              int32_t sat_val = (1 << sat_pos) - 1;
+              int32_t shift = instr->bits(11, 7);
+              int32_t shift_type = instr->bit(6);
+              int32_t rm_val = get_register(instr->rmValue());
+              if (shift_type == 0) {  // LSL
+                rm_val <<= shift;
+              } else {  // ASR
+                rm_val >>= shift;
+              }
+
+              // If saturation occurs, the Q flag should be set in the
+              // CPSR. There is no Q flag yet, and no instruction (MRS)
+              // to read the CPSR directly.
+              if (rm_val > sat_val) {
+                rm_val = sat_val;
+              } else if (rm_val < 0) {
+                rm_val = 0;
+              }
+              set_register(rd, rm_val);
+              break;
+            }
+          }
+        } else {
+          switch (instr->bits(22, 21)) {
+            case 0:
+              MOZ_CRASH();
+              break;
+            case 1:
+              if (instr->bits(7, 4) == 7 && instr->bits(19, 16) == 15) {
+                uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                              instr->bits(11, 10));
+                if (instr->bit(20)) {
+                  // Sxth.
+                  set_register(rd, (int32_t)(int16_t)(rm_val & 0xFFFF));
+                } else {
+                  // Sxtb.
+                  set_register(rd, (int32_t)(int8_t)(rm_val & 0xFF));
+                }
+              } else if (instr->bits(20, 16) == 0b1'1111 &&
+                         instr->bits(11, 4) == 0b1111'0011) {
+                // Rev
+                uint32_t rm_val = get_register(instr->rmValue());
+
+                static_assert(MOZ_LITTLE_ENDIAN());
+                set_register(rd,
+                             mozilla::NativeEndian::swapToBigEndian(rm_val));
+              } else if (instr->bits(20, 16) == 0b1'1111 &&
+                         instr->bits(11, 4) == 0b1111'1011) {
+                // Rev16
+                uint32_t rm_val = get_register(instr->rmValue());
+
+                static_assert(MOZ_LITTLE_ENDIAN());
+                uint32_t hi = mozilla::NativeEndian::swapToBigEndian(
+                    uint16_t(rm_val >> 16));
+                uint32_t lo =
+                    mozilla::NativeEndian::swapToBigEndian(uint16_t(rm_val));
+                set_register(rd, (hi << 16) | lo);
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 2:
+              if ((instr->bit(20) == 0) && (instr->bits(9, 6) == 1)) {
+                if (instr->bits(19, 16) == 0xF) {
+                  // Uxtb16.
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, (rm_val & 0xFF) | (rm_val & 0xFF0000));
+                } else {
+                  MOZ_CRASH();
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 3:
+              if ((instr->bit(20) == 0) && (instr->bits(9, 6) == 1)) {
+                if (instr->bits(19, 16) == 0xF) {
+                  // Uxtb.
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, (rm_val & 0xFF));
+                } else {
+                  // Uxtab.
+                  uint32_t rn_val = get_register(rn);
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, rn_val + (rm_val & 0xFF));
+                }
+              } else if ((instr->bit(20) == 1) && (instr->bits(9, 6) == 1)) {
+                if (instr->bits(19, 16) == 0xF) {
+                  // Uxth.
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, (rm_val & 0xFFFF));
+                } else {
+                  // Uxtah.
+                  uint32_t rn_val = get_register(rn);
+                  uint32_t rm_val = rotateBytes(get_register(instr->rmValue()),
+                                                instr->bits(11, 10));
+                  set_register(rd, rn_val + (rm_val & 0xFFFF));
+                }
+              } else if (instr->bits(20, 16) == 0b1'1111 &&
+                         instr->bits(11, 4) == 0b1111'1011) {
+                // Revsh
+                uint32_t rm_val = get_register(instr->rmValue());
+
+                static_assert(MOZ_LITTLE_ENDIAN());
+                set_register(
+                    rd, int32_t(int16_t(mozilla::NativeEndian::swapToBigEndian(
+                            uint16_t(rm_val)))));
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+          }
+        }
+        return;
+      }
+      break;
+    }
+    case db_x: {  // sudiv
+      if (instr->bit(22) == 0x0 && instr->bit(20) == 0x1 &&
+          instr->bits(15, 12) == 0x0f && instr->bits(7, 4) == 0x1) {
+        if (!instr->hasW()) {
+          // sdiv (in V8 notation matching ARM ISA format) rn = rm/rs.
+          int rm = instr->rmValue();
+          int32_t rm_val = get_register(rm);
+          int rs = instr->rsValue();
+          int32_t rs_val = get_register(rs);
+          int32_t ret_val = 0;
+          MOZ_ASSERT(rs_val != 0);
+          if ((rm_val == INT32_MIN) && (rs_val == -1)) {
+            ret_val = INT32_MIN;
+          } else {
+            ret_val = rm_val / rs_val;
+          }
+          set_register(rn, ret_val);
+          return;
+        } else {
+          // udiv (in V8 notation matching ARM ISA format) rn = rm/rs.
+          int rm = instr->rmValue();
+          uint32_t rm_val = get_register(rm);
+          int rs = instr->rsValue();
+          uint32_t rs_val = get_register(rs);
+          uint32_t ret_val = 0;
+          MOZ_ASSERT(rs_val != 0);
+          ret_val = rm_val / rs_val;
+          set_register(rn, ret_val);
+          return;
+        }
+      }
+
+      addr = rn_val - shifter_operand;
+      if (instr->hasW()) {
+        set_register(rn, addr);
+      }
+      break;
+    }
+    case ib_x: {
+      if (instr->hasW() && (instr->bits(6, 4) == 0x5)) {
+        uint32_t widthminus1 = static_cast<uint32_t>(instr->bits(20, 16));
+        uint32_t lsbit = static_cast<uint32_t>(instr->bits(11, 7));
+        uint32_t msbit = widthminus1 + lsbit;
+        if (msbit <= 31) {
+          if (instr->bit(22)) {
+            // ubfx - unsigned bitfield extract.
+            uint32_t rm_val =
+                static_cast<uint32_t>(get_register(instr->rmValue()));
+            uint32_t extr_val = rm_val << (31 - msbit);
+            extr_val = extr_val >> (31 - widthminus1);
+            set_register(instr->rdValue(), extr_val);
+          } else {
+            // sbfx - signed bitfield extract.
+            int32_t rm_val = get_register(instr->rmValue());
+            int32_t extr_val = rm_val << (31 - msbit);
+            extr_val = extr_val >> (31 - widthminus1);
+            set_register(instr->rdValue(), extr_val);
+          }
+        } else {
+          MOZ_CRASH();
+        }
+        return;
+      } else if (!instr->hasW() && (instr->bits(6, 4) == 0x1)) {
+        uint32_t lsbit = static_cast<uint32_t>(instr->bits(11, 7));
+        uint32_t msbit = static_cast<uint32_t>(instr->bits(20, 16));
+        if (msbit >= lsbit) {
+          // bfc or bfi - bitfield clear/insert.
+          uint32_t rd_val =
+              static_cast<uint32_t>(get_register(instr->rdValue()));
+          uint32_t bitcount = msbit - lsbit + 1;
+          uint32_t mask = (1 << bitcount) - 1;
+          rd_val &= ~(mask << lsbit);
+          if (instr->rmValue() != 15) {
+            // bfi - bitfield insert.
+            uint32_t rm_val =
+                static_cast<uint32_t>(get_register(instr->rmValue()));
+            rm_val &= mask;
+            rd_val |= rm_val << lsbit;
+          }
+          set_register(instr->rdValue(), rd_val);
+        } else {
+          MOZ_CRASH();
+        }
+        return;
+      } else {
+        addr = rn_val + shifter_operand;
+        if (instr->hasW()) {
+          set_register(rn, addr);
+        }
+      }
+      break;
+    }
+    default:
+      MOZ_CRASH();
+      break;
+  }
+  if (instr->hasB()) {
+    if (instr->hasL()) {
+      uint8_t byte = readB(addr);
+      set_register(rd, byte);
+    } else {
+      uint8_t byte = get_register(rd);
+      writeB(addr, byte);
+    }
+  } else {
+    if (instr->hasL()) {
+      set_register(rd, readW(addr, instr, AllowUnaligned));
+    } else {
+      writeW(addr, get_register(rd), instr, AllowUnaligned);
+    }
+  }
+}
+
+void Simulator::decodeType4(SimInstruction* instr) {
+  // Only allowed to be set in privileged mode.
+  MOZ_ASSERT(instr->bit(22) == 0);
+  bool load = instr->hasL();
+  handleRList(instr, load);
+}
+
+void Simulator::decodeType5(SimInstruction* instr) {
+  int off = instr->sImmed24Value() << 2;
+  intptr_t pc_address = get_pc();
+  if (instr->hasLink()) {
+    set_register(lr, pc_address + SimInstruction::kInstrSize);
+  }
+  int pc_reg = get_register(pc);
+  set_pc(pc_reg + off);
+}
+
+void Simulator::decodeType6(SimInstruction* instr) {
+  decodeType6CoprocessorIns(instr);
+}
+
+void Simulator::decodeType7(SimInstruction* instr) {
+  if (instr->bit(24) == 1) {
+    softwareInterrupt(instr);
+  } else if (instr->bit(4) == 1 && instr->bits(11, 9) != 5) {
+    decodeType7CoprocessorIns(instr);
+  } else {
+    decodeTypeVFP(instr);
+  }
+}
+
+void Simulator::decodeType7CoprocessorIns(SimInstruction* instr) {
+  if (instr->bit(20) == 0) {
+    // MCR, MCR2
+    if (instr->coprocessorValue() == 15) {
+      int opc1 = instr->bits(23, 21);
+      int opc2 = instr->bits(7, 5);
+      int CRn = instr->bits(19, 16);
+      int CRm = instr->bits(3, 0);
+      if (opc1 == 0 && opc2 == 4 && CRn == 7 && CRm == 10) {
+        // ARMv6 DSB instruction.  We do not use DSB.
+        MOZ_CRASH("DSB not implemented");
+      } else if (opc1 == 0 && opc2 == 5 && CRn == 7 && CRm == 10) {
+        // ARMv6 DMB instruction.
+        AtomicOperations::fenceSeqCst();
+      } else if (opc1 == 0 && opc2 == 4 && CRn == 7 && CRm == 5) {
+        // ARMv6 ISB instruction.  We do not use ISB.
+        MOZ_CRASH("ISB not implemented");
+      } else {
+        MOZ_CRASH();
+      }
+    } else {
+      MOZ_CRASH();
+    }
+  } else {
+    // MRC, MRC2
+    MOZ_CRASH();
+  }
+}
+
+void Simulator::decodeTypeVFP(SimInstruction* instr) {
+  MOZ_ASSERT(instr->typeValue() == 7 && instr->bit(24) == 0);
+  MOZ_ASSERT(instr->bits(11, 9) == 0x5);
+
+  // Obtain double precision register codes.
+  VFPRegPrecision precision =
+      (instr->szValue() == 1) ? kDoublePrecision : kSinglePrecision;
+  int vm = instr->VFPMRegValue(precision);
+  int vd = instr->VFPDRegValue(precision);
+  int vn = instr->VFPNRegValue(precision);
+
+  if (instr->bit(4) == 0) {
+    if (instr->opc1Value() == 0x7) {
+      // Other data processing instructions.
+      if ((instr->opc2Value() == 0x0) && (instr->opc3Value() == 0x1)) {
+        // vmov register to register.
+        if (instr->szValue() == 0x1) {
+          int m = instr->VFPMRegValue(kDoublePrecision);
+          int d = instr->VFPDRegValue(kDoublePrecision);
+          double temp;
+          get_double_from_d_register(m, &temp);
+          set_d_register_from_double(d, temp);
+        } else {
+          int m = instr->VFPMRegValue(kSinglePrecision);
+          int d = instr->VFPDRegValue(kSinglePrecision);
+          float temp;
+          get_float_from_s_register(m, &temp);
+          set_s_register_from_float(d, temp);
+        }
+      } else if ((instr->opc2Value() == 0x0) && (instr->opc3Value() == 0x3)) {
+        // vabs
+        if (instr->szValue() == 0x1) {
+          union {
+            double f64;
+            uint64_t u64;
+          } u;
+          get_double_from_d_register(vm, &u.f64);
+          u.u64 &= 0x7fffffffffffffffu;
+          double dd_value = u.f64;
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        } else {
+          union {
+            float f32;
+            uint32_t u32;
+          } u;
+          get_float_from_s_register(vm, &u.f32);
+          u.u32 &= 0x7fffffffu;
+          float fd_value = u.f32;
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        }
+      } else if ((instr->opc2Value() == 0x1) && (instr->opc3Value() == 0x1)) {
+        // vneg
+        if (instr->szValue() == 0x1) {
+          double dm_value;
+          get_double_from_d_register(vm, &dm_value);
+          double dd_value = -dm_value;
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        } else {
+          float fm_value;
+          get_float_from_s_register(vm, &fm_value);
+          float fd_value = -fm_value;
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        }
+      } else if ((instr->opc2Value() == 0x7) && (instr->opc3Value() == 0x3)) {
+        decodeVCVTBetweenDoubleAndSingle(instr);
+      } else if ((instr->opc2Value() == 0x8) && (instr->opc3Value() & 0x1)) {
+        decodeVCVTBetweenFloatingPointAndInteger(instr);
+      } else if ((instr->opc2Value() == 0xA) && (instr->opc3Value() == 0x3) &&
+                 (instr->bit(8) == 1)) {
+        // vcvt.f64.s32 Dd, Dd, #<fbits>.
+        int fraction_bits = 32 - ((instr->bits(3, 0) << 1) | instr->bit(5));
+        int fixed_value = get_sinteger_from_s_register(vd * 2);
+        double divide = 1 << fraction_bits;
+        set_d_register_from_double(vd, fixed_value / divide);
+      } else if (((instr->opc2Value() >> 1) == 0x6) &&
+                 (instr->opc3Value() & 0x1)) {
+        decodeVCVTBetweenFloatingPointAndInteger(instr);
+      } else if (((instr->opc2Value() == 0x4) || (instr->opc2Value() == 0x5)) &&
+                 (instr->opc3Value() & 0x1)) {
+        decodeVCMP(instr);
+      } else if (((instr->opc2Value() == 0x1)) && (instr->opc3Value() == 0x3)) {
+        // vsqrt
+        if (instr->szValue() == 0x1) {
+          double dm_value;
+          get_double_from_d_register(vm, &dm_value);
+          double dd_value = std::sqrt(dm_value);
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        } else {
+          float fm_value;
+          get_float_from_s_register(vm, &fm_value);
+          float fd_value = std::sqrt(fm_value);
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        }
+      } else if (instr->opc3Value() == 0x0) {
+        // vmov immediate.
+        if (instr->szValue() == 0x1) {
+          set_d_register_from_double(vd, instr->doubleImmedVmov());
+        } else {
+          // vmov.f32 immediate.
+          set_s_register_from_float(vd, instr->float32ImmedVmov());
+        }
+      } else {
+        decodeVCVTBetweenFloatingPointAndIntegerFrac(instr);
+      }
+    } else if (instr->opc1Value() == 0x3) {
+      if (instr->szValue() != 0x1) {
+        if (instr->opc3Value() & 0x1) {
+          // vsub
+          float fn_value;
+          get_float_from_s_register(vn, &fn_value);
+          float fm_value;
+          get_float_from_s_register(vm, &fm_value);
+          float fd_value = fn_value - fm_value;
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        } else {
+          // vadd
+          float fn_value;
+          get_float_from_s_register(vn, &fn_value);
+          float fm_value;
+          get_float_from_s_register(vm, &fm_value);
+          float fd_value = fn_value + fm_value;
+          canonicalizeNaN(&fd_value);
+          set_s_register_from_float(vd, fd_value);
+        }
+      } else {
+        if (instr->opc3Value() & 0x1) {
+          // vsub
+          double dn_value;
+          get_double_from_d_register(vn, &dn_value);
+          double dm_value;
+          get_double_from_d_register(vm, &dm_value);
+          double dd_value = dn_value - dm_value;
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        } else {
+          // vadd
+          double dn_value;
+          get_double_from_d_register(vn, &dn_value);
+          double dm_value;
+          get_double_from_d_register(vm, &dm_value);
+          double dd_value = dn_value + dm_value;
+          canonicalizeNaN(&dd_value);
+          set_d_register_from_double(vd, dd_value);
+        }
+      }
+    } else if ((instr->opc1Value() == 0x2) && !(instr->opc3Value() & 0x1)) {
+      // vmul
+      if (instr->szValue() != 0x1) {
+        float fn_value;
+        get_float_from_s_register(vn, &fn_value);
+        float fm_value;
+        get_float_from_s_register(vm, &fm_value);
+        float fd_value = fn_value * fm_value;
+        canonicalizeNaN(&fd_value);
+        set_s_register_from_float(vd, fd_value);
+      } else {
+        double dn_value;
+        get_double_from_d_register(vn, &dn_value);
+        double dm_value;
+        get_double_from_d_register(vm, &dm_value);
+        double dd_value = dn_value * dm_value;
+        canonicalizeNaN(&dd_value);
+        set_d_register_from_double(vd, dd_value);
+      }
+    } else if ((instr->opc1Value() == 0x0)) {
+      // vmla, vmls
+      const bool is_vmls = (instr->opc3Value() & 0x1);
+
+      if (instr->szValue() != 0x1) {
+        MOZ_CRASH("Not used by V8.");
+      }
+
+      double dd_val;
+      get_double_from_d_register(vd, &dd_val);
+      double dn_val;
+      get_double_from_d_register(vn, &dn_val);
+      double dm_val;
+      get_double_from_d_register(vm, &dm_val);
+
+      // Note: we do the mul and add/sub in separate steps to avoid
+      // getting a result with too high precision.
+      set_d_register_from_double(vd, dn_val * dm_val);
+      double temp;
+      get_double_from_d_register(vd, &temp);
+      if (is_vmls) {
+        temp = dd_val - temp;
+      } else {
+        temp = dd_val + temp;
+      }
+      canonicalizeNaN(&temp);
+      set_d_register_from_double(vd, temp);
+    } else if ((instr->opc1Value() == 0x4) && !(instr->opc3Value() & 0x1)) {
+      // vdiv
+      if (instr->szValue() != 0x1) {
+        float fn_value;
+        get_float_from_s_register(vn, &fn_value);
+        float fm_value;
+        get_float_from_s_register(vm, &fm_value);
+        float fd_value = fn_value / fm_value;
+        div_zero_vfp_flag_ = (fm_value == 0);
+        canonicalizeNaN(&fd_value);
+        set_s_register_from_float(vd, fd_value);
+      } else {
+        double dn_value;
+        get_double_from_d_register(vn, &dn_value);
+        double dm_value;
+        get_double_from_d_register(vm, &dm_value);
+        double dd_value = dn_value / dm_value;
+        div_zero_vfp_flag_ = (dm_value == 0);
+        canonicalizeNaN(&dd_value);
+        set_d_register_from_double(vd, dd_value);
+      }
+    } else {
+      MOZ_CRASH();
+    }
+  } else {
+    if (instr->VCValue() == 0x0 && instr->VAValue() == 0x0) {
+      decodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
+    } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1) &&
+               (instr->bit(23) == 0x0)) {
+      // vmov (ARM core register to scalar).
+      int vd = instr->bits(19, 16) | (instr->bit(7) << 4);
+      double dd_value;
+      get_double_from_d_register(vd, &dd_value);
+      int32_t data[2];
+      memcpy(data, &dd_value, 8);
+      data[instr->bit(21)] = get_register(instr->rtValue());
+      memcpy(&dd_value, data, 8);
+      set_d_register_from_double(vd, dd_value);
+    } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1) &&
+               (instr->bit(23) == 0x0)) {
+      // vmov (scalar to ARM core register).
+      int vn = instr->bits(19, 16) | (instr->bit(7) << 4);
+      double dn_value;
+      get_double_from_d_register(vn, &dn_value);
+      int32_t data[2];
+      memcpy(data, &dn_value, 8);
+      set_register(instr->rtValue(), data[instr->bit(21)]);
+    } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x0) &&
+               (instr->VAValue() == 0x7) && (instr->bits(19, 16) == 0x1)) {
+      // vmrs
+      uint32_t rt = instr->rtValue();
+      if (rt == 0xF) {
+        copy_FPSCR_to_APSR();
+      } else {
+        // Emulate FPSCR from the Simulator flags.
+        uint32_t fpscr = (n_flag_FPSCR_ << 31) | (z_flag_FPSCR_ << 30) |
+                         (c_flag_FPSCR_ << 29) | (v_flag_FPSCR_ << 28) |
+                         (FPSCR_default_NaN_mode_ << 25) |
+                         (inexact_vfp_flag_ << 4) | (underflow_vfp_flag_ << 3) |
+                         (overflow_vfp_flag_ << 2) | (div_zero_vfp_flag_ << 1) |
+                         (inv_op_vfp_flag_ << 0) | (FPSCR_rounding_mode_);
+        set_register(rt, fpscr);
+      }
+    } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x0) &&
+               (instr->VAValue() == 0x7) && (instr->bits(19, 16) == 0x1)) {
+      // vmsr
+      uint32_t rt = instr->rtValue();
+      if (rt == pc) {
+        MOZ_CRASH();
+      } else {
+        uint32_t rt_value = get_register(rt);
+        n_flag_FPSCR_ = (rt_value >> 31) & 1;
+        z_flag_FPSCR_ = (rt_value >> 30) & 1;
+        c_flag_FPSCR_ = (rt_value >> 29) & 1;
+        v_flag_FPSCR_ = (rt_value >> 28) & 1;
+        FPSCR_default_NaN_mode_ = (rt_value >> 25) & 1;
+        inexact_vfp_flag_ = (rt_value >> 4) & 1;
+        underflow_vfp_flag_ = (rt_value >> 3) & 1;
+        overflow_vfp_flag_ = (rt_value >> 2) & 1;
+        div_zero_vfp_flag_ = (rt_value >> 1) & 1;
+        inv_op_vfp_flag_ = (rt_value >> 0) & 1;
+        FPSCR_rounding_mode_ =
+            static_cast<VFPRoundingMode>((rt_value)&kVFPRoundingModeMask);
+      }
+    } else {
+      MOZ_CRASH();
+    }
+  }
+}
+
+void Simulator::decodeVMOVBetweenCoreAndSinglePrecisionRegisters(
+    SimInstruction* instr) {
+  MOZ_ASSERT(instr->bit(4) == 1 && instr->VCValue() == 0x0 &&
+             instr->VAValue() == 0x0);
+
+  int t = instr->rtValue();
+  int n = instr->VFPNRegValue(kSinglePrecision);
+  bool to_arm_register = (instr->VLValue() == 0x1);
+  if (to_arm_register) {
+    int32_t int_value = get_sinteger_from_s_register(n);
+    set_register(t, int_value);
+  } else {
+    int32_t rs_val = get_register(t);
+    set_s_register_from_sinteger(n, rs_val);
+  }
+}
+
+void Simulator::decodeVCMP(SimInstruction* instr) {
+  MOZ_ASSERT((instr->bit(4) == 0) && (instr->opc1Value() == 0x7));
+  MOZ_ASSERT(((instr->opc2Value() == 0x4) || (instr->opc2Value() == 0x5)) &&
+             (instr->opc3Value() & 0x1));
+  // Comparison.
+
+  VFPRegPrecision precision = kSinglePrecision;
+  if (instr->szValue() == 1) {
+    precision = kDoublePrecision;
+  }
+
+  int d = instr->VFPDRegValue(precision);
+  int m = 0;
+  if (instr->opc2Value() == 0x4) {
+    m = instr->VFPMRegValue(precision);
+  }
+
+  if (precision == kDoublePrecision) {
+    double dd_value;
+    get_double_from_d_register(d, &dd_value);
+    double dm_value = 0.0;
+    if (instr->opc2Value() == 0x4) {
+      get_double_from_d_register(m, &dm_value);
+    }
+
+    // Raise exceptions for quiet NaNs if necessary.
+    if (instr->bit(7) == 1) {
+      if (std::isnan(dd_value)) {
+        inv_op_vfp_flag_ = true;
+      }
+    }
+    compute_FPSCR_Flags(dd_value, dm_value);
+  } else {
+    float fd_value;
+    get_float_from_s_register(d, &fd_value);
+    float fm_value = 0.0;
+    if (instr->opc2Value() == 0x4) {
+      get_float_from_s_register(m, &fm_value);
+    }
+
+    // Raise exceptions for quiet NaNs if necessary.
+    if (instr->bit(7) == 1) {
+      if (std::isnan(fd_value)) {
+        inv_op_vfp_flag_ = true;
+      }
+    }
+    compute_FPSCR_Flags(fd_value, fm_value);
+  }
+}
+
+void Simulator::decodeVCVTBetweenDoubleAndSingle(SimInstruction* instr) {
+  MOZ_ASSERT(instr->bit(4) == 0 && instr->opc1Value() == 0x7);
+  MOZ_ASSERT(instr->opc2Value() == 0x7 && instr->opc3Value() == 0x3);
+
+  VFPRegPrecision dst_precision = kDoublePrecision;
+  VFPRegPrecision src_precision = kSinglePrecision;
+  if (instr->szValue() == 1) {
+    dst_precision = kSinglePrecision;
+    src_precision = kDoublePrecision;
+  }
+
+  int dst = instr->VFPDRegValue(dst_precision);
+  int src = instr->VFPMRegValue(src_precision);
+
+  if (dst_precision == kSinglePrecision) {
+    double val;
+    get_double_from_d_register(src, &val);
+    set_s_register_from_float(dst, static_cast<float>(val));
+  } else {
+    float val;
+    get_float_from_s_register(src, &val);
+    set_d_register_from_double(dst, static_cast<double>(val));
+  }
+}
+
+static bool get_inv_op_vfp_flag(VFPRoundingMode mode, double val,
+                                bool unsigned_) {
+  MOZ_ASSERT(mode == SimRN || mode == SimRM || mode == SimRZ);
+  double max_uint = static_cast<double>(0xffffffffu);
+  double max_int = static_cast<double>(INT32_MAX);
+  double min_int = static_cast<double>(INT32_MIN);
+
+  // Check for NaN.
+  if (val != val) {
+    return true;
+  }
+
+  // Check for overflow. This code works because 32bit integers can be exactly
+  // represented by ieee-754 64bit floating-point values.
+  switch (mode) {
+    case SimRN:
+      return unsigned_ ? (val >= (max_uint + 0.5)) || (val < -0.5)
+                       : (val >= (max_int + 0.5)) || (val < (min_int - 0.5));
+    case SimRM:
+      return unsigned_ ? (val >= (max_uint + 1.0)) || (val < 0)
+                       : (val >= (max_int + 1.0)) || (val < min_int);
+    case SimRZ:
+      return unsigned_ ? (val >= (max_uint + 1.0)) || (val <= -1)
+                       : (val >= (max_int + 1.0)) || (val <= (min_int - 1.0));
+    default:
+      MOZ_CRASH();
+      return true;
+  }
+}
+
+// We call this function only if we had a vfp invalid exception.
+// It returns the correct saturated value.
+static int VFPConversionSaturate(double val, bool unsigned_res) {
+  if (val != val) {  // NaN.
+    return 0;
+  }
+  if (unsigned_res) {
+    return (val < 0) ? 0 : 0xffffffffu;
+  }
+  return (val < 0) ? INT32_MIN : INT32_MAX;
+}
+
+void Simulator::decodeVCVTBetweenFloatingPointAndInteger(
+    SimInstruction* instr) {
+  MOZ_ASSERT((instr->bit(4) == 0) && (instr->opc1Value() == 0x7) &&
+             (instr->bits(27, 23) == 0x1D));
+  MOZ_ASSERT(
+      ((instr->opc2Value() == 0x8) && (instr->opc3Value() & 0x1)) ||
+      (((instr->opc2Value() >> 1) == 0x6) && (instr->opc3Value() & 0x1)));
+
+  // Conversion between floating-point and integer.
+  bool to_integer = (instr->bit(18) == 1);
+
+  VFPRegPrecision src_precision =
+      (instr->szValue() == 1) ? kDoublePrecision : kSinglePrecision;
+
+  if (to_integer) {
+    // We are playing with code close to the C++ standard's limits below,
+    // hence the very simple code and heavy checks.
+    //
+    // Note: C++ defines default type casting from floating point to integer
+    // as (close to) rounding toward zero ("fractional part discarded").
+
+    int dst = instr->VFPDRegValue(kSinglePrecision);
+    int src = instr->VFPMRegValue(src_precision);
+
+    // Bit 7 in vcvt instructions indicates if we should use the FPSCR
+    // rounding mode or the default Round to Zero mode.
+    VFPRoundingMode mode = (instr->bit(7) != 1) ? FPSCR_rounding_mode_ : SimRZ;
+    MOZ_ASSERT(mode == SimRM || mode == SimRZ || mode == SimRN);
+
+    bool unsigned_integer = (instr->bit(16) == 0);
+    bool double_precision = (src_precision == kDoublePrecision);
+
+    double val;
+    if (double_precision) {
+      get_double_from_d_register(src, &val);
+    } else {
+      float fval;
+      get_float_from_s_register(src, &fval);
+      val = double(fval);
+    }
+
+    int temp = unsigned_integer ? static_cast<uint32_t>(val)
+                                : static_cast<int32_t>(val);
+
+    inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);
+
+    double abs_diff = unsigned_integer
+                          ? std::fabs(val - static_cast<uint32_t>(temp))
+                          : std::fabs(val - temp);
+
+    inexact_vfp_flag_ = (abs_diff != 0);
+
+    if (inv_op_vfp_flag_) {
+      temp = VFPConversionSaturate(val, unsigned_integer);
+    } else {
+      switch (mode) {
+        case SimRN: {
+          int val_sign = (val > 0) ? 1 : -1;
+          if (abs_diff > 0.5) {
+            temp += val_sign;
+          } else if (abs_diff == 0.5) {
+            // Round to even if exactly halfway.
+            temp = ((temp % 2) == 0) ? temp : temp + val_sign;
+          }
+          break;
+        }
+
+        case SimRM:
+          temp = temp > val ? temp - 1 : temp;
+          break;
+
+        case SimRZ:
+          // Nothing to do.
+          break;
+
+        default:
+          MOZ_CRASH();
+      }
+    }
+
+    // Update the destination register.
+    set_s_register_from_sinteger(dst, temp);
+  } else {
+    bool unsigned_integer = (instr->bit(7) == 0);
+    int dst = instr->VFPDRegValue(src_precision);
+    int src = instr->VFPMRegValue(kSinglePrecision);
+
+    int val = get_sinteger_from_s_register(src);
+
+    if (src_precision == kDoublePrecision) {
+      if (unsigned_integer) {
+        set_d_register_from_double(
+            dst, static_cast<double>(static_cast<uint32_t>(val)));
+      } else {
+        set_d_register_from_double(dst, static_cast<double>(val));
+      }
+    } else {
+      if (unsigned_integer) {
+        set_s_register_from_float(
+            dst, static_cast<float>(static_cast<uint32_t>(val)));
+      } else {
+        set_s_register_from_float(dst, static_cast<float>(val));
+      }
+    }
+  }
+}
+
+// A VFPv3 specific instruction.
+void Simulator::decodeVCVTBetweenFloatingPointAndIntegerFrac(
+    SimInstruction* instr) {
+  MOZ_ASSERT(instr->bits(27, 24) == 0xE && instr->opc1Value() == 0x7 &&
+             instr->bit(19) == 1 && instr->bit(17) == 1 &&
+             instr->bits(11, 9) == 0x5 && instr->bit(6) == 1 &&
+             instr->bit(4) == 0);
+
+  int size = (instr->bit(7) == 1) ? 32 : 16;
+
+  int fraction_bits = size - ((instr->bits(3, 0) << 1) | instr->bit(5));
+  double mult = 1 << fraction_bits;
+
+  MOZ_ASSERT(size == 32);  // Only handling size == 32 for now.
+
+  // Conversion between floating-point and integer.
+  bool to_fixed = (instr->bit(18) == 1);
+
+  VFPRegPrecision precision =
+      (instr->szValue() == 1) ? kDoublePrecision : kSinglePrecision;
+
+  if (to_fixed) {
+    // We are playing with code close to the C++ standard's limits below,
+    // hence the very simple code and heavy checks.
+    //
+    // Note: C++ defines default type casting from floating point to integer
+    // as (close to) rounding toward zero ("fractional part discarded").
+
+    int dst = instr->VFPDRegValue(precision);
+
+    bool unsigned_integer = (instr->bit(16) == 1);
+    bool double_precision = (precision == kDoublePrecision);
+
+    double val;
+    if (double_precision) {
+      get_double_from_d_register(dst, &val);
+    } else {
+      float fval;
+      get_float_from_s_register(dst, &fval);
+      val = double(fval);
+    }
+
+    // Scale value by specified number of fraction bits.
+    val *= mult;
+
+    // Rounding down towards zero. No need to account for the rounding error
+    // as this instruction always rounds down towards zero. See SimRZ below.
+    int temp = unsigned_integer ? static_cast<uint32_t>(val)
+                                : static_cast<int32_t>(val);
+
+    inv_op_vfp_flag_ = get_inv_op_vfp_flag(SimRZ, val, unsigned_integer);
+
+    double abs_diff = unsigned_integer
+                          ? std::fabs(val - static_cast<uint32_t>(temp))
+                          : std::fabs(val - temp);
+
+    inexact_vfp_flag_ = (abs_diff != 0);
+
+    if (inv_op_vfp_flag_) {
+      temp = VFPConversionSaturate(val, unsigned_integer);
+    }
+
+    // Update the destination register.
+    if (double_precision) {
+      uint32_t dbl[2];
+      dbl[0] = temp;
+      dbl[1] = 0;
+      set_d_register(dst, dbl);
+    } else {
+      set_s_register_from_sinteger(dst, temp);
+    }
+  } else {
+    MOZ_CRASH();  // Not implemented, fixed to float.
+  }
+}
+
+void Simulator::decodeType6CoprocessorIns(SimInstruction* instr) {
+  MOZ_ASSERT(instr->typeValue() == 6);
+
+  if (instr->coprocessorValue() == 0xA) {
+    switch (instr->opcodeValue()) {
+      case 0x8:
+      case 0xA:
+      case 0xC:
+      case 0xE: {  // Load and store single precision float to memory.
+        int rn = instr->rnValue();
+        int vd = instr->VFPDRegValue(kSinglePrecision);
+        int offset = instr->immed8Value();
+        if (!instr->hasU()) {
+          offset = -offset;
+        }
+
+        int32_t address = get_register(rn) + 4 * offset;
+        if (instr->hasL()) {
+          // Load double from memory: vldr.
+          set_s_register_from_sinteger(vd, readW(address, instr));
+        } else {
+          // Store double to memory: vstr.
+          writeW(address, get_sinteger_from_s_register(vd), instr);
+        }
+        break;
+      }
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7:
+      case 0x9:
+      case 0xB:
+        // Load/store multiple single from memory: vldm/vstm.
+        handleVList(instr);
+        break;
+      default:
+        MOZ_CRASH();
+    }
+  } else if (instr->coprocessorValue() == 0xB) {
+    switch (instr->opcodeValue()) {
+      case 0x2:
+        // Load and store double to two GP registers
+        if (instr->bits(7, 6) != 0 || instr->bit(4) != 1) {
+          MOZ_CRASH();  // Not used atm.
+        } else {
+          int rt = instr->rtValue();
+          int rn = instr->rnValue();
+          int vm = instr->VFPMRegValue(kDoublePrecision);
+          if (instr->hasL()) {
+            int32_t data[2];
+            double d;
+            get_double_from_d_register(vm, &d);
+            memcpy(data, &d, 8);
+            set_register(rt, data[0]);
+            set_register(rn, data[1]);
+          } else {
+            int32_t data[] = {get_register(rt), get_register(rn)};
+            double d;
+            memcpy(&d, data, 8);
+            set_d_register_from_double(vm, d);
+          }
+        }
+        break;
+      case 0x8:
+      case 0xA:
+      case 0xC:
+      case 0xE: {  // Load and store double to memory.
+        int rn = instr->rnValue();
+        int vd = instr->VFPDRegValue(kDoublePrecision);
+        int offset = instr->immed8Value();
+        if (!instr->hasU()) {
+          offset = -offset;
+        }
+        int32_t address = get_register(rn) + 4 * offset;
+        if (instr->hasL()) {
+          // Load double from memory: vldr.
+          uint64_t data = readQ(address, instr);
+          double val;
+          memcpy(&val, &data, 8);
+          set_d_register_from_double(vd, val);
+        } else {
+          // Store double to memory: vstr.
+          uint64_t data;
+          double val;
+          get_double_from_d_register(vd, &val);
+          memcpy(&data, &val, 8);
+          writeQ(address, data, instr);
+        }
+        break;
+      }
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7:
+      case 0x9:
+      case 0xB:
+        // Load/store multiple double from memory: vldm/vstm.
+        handleVList(instr);
+        break;
+      default:
+        MOZ_CRASH();
+    }
+  } else {
+    MOZ_CRASH();
+  }
+}
+
+void Simulator::decodeSpecialCondition(SimInstruction* instr) {
+  switch (instr->specialValue()) {
+    case 5:
+      if (instr->bits(18, 16) == 0 && instr->bits(11, 6) == 0x28 &&
+          instr->bit(4) == 1) {
+        // vmovl signed
+        if ((instr->vdValue() & 1) != 0) {
+          MOZ_CRASH("Undefined behavior");
+        }
+        int Vd = (instr->bit(22) << 3) | (instr->vdValue() >> 1);
+        int Vm = (instr->bit(5) << 4) | instr->vmValue();
+        int imm3 = instr->bits(21, 19);
+        if (imm3 != 1 && imm3 != 2 && imm3 != 4) {
+          MOZ_CRASH();
+        }
+        int esize = 8 * imm3;
+        int elements = 64 / esize;
+        int8_t from[8];
+        get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
+        int16_t to[8];
+        int e = 0;
+        while (e < elements) {
+          to[e] = from[e];
+          e++;
+        }
+        set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 7:
+      if (instr->bits(18, 16) == 0 && instr->bits(11, 6) == 0x28 &&
+          instr->bit(4) == 1) {
+        // vmovl unsigned.
+        if ((instr->vdValue() & 1) != 0) {
+          MOZ_CRASH("Undefined behavior");
+        }
+        int Vd = (instr->bit(22) << 3) | (instr->vdValue() >> 1);
+        int Vm = (instr->bit(5) << 4) | instr->vmValue();
+        int imm3 = instr->bits(21, 19);
+        if (imm3 != 1 && imm3 != 2 && imm3 != 4) {
+          MOZ_CRASH();
+        }
+        int esize = 8 * imm3;
+        int elements = 64 / esize;
+        uint8_t from[8];
+        get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
+        uint16_t to[8];
+        int e = 0;
+        while (e < elements) {
+          to[e] = from[e];
+          e++;
+        }
+        set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 8:
+      if (instr->bits(21, 20) == 0) {
+        // vst1
+        int Vd = (instr->bit(22) << 4) | instr->vdValue();
+        int Rn = instr->vnValue();
+        int type = instr->bits(11, 8);
+        int Rm = instr->vmValue();
+        int32_t address = get_register(Rn);
+        int regs = 0;
+        switch (type) {
+          case nlt_1:
+            regs = 1;
+            break;
+          case nlt_2:
+            regs = 2;
+            break;
+          case nlt_3:
+            regs = 3;
+            break;
+          case nlt_4:
+            regs = 4;
+            break;
+          default:
+            MOZ_CRASH();
+            break;
+        }
+        int r = 0;
+        while (r < regs) {
+          uint32_t data[2];
+          get_d_register(Vd + r, data);
+          // TODO: We should AllowUnaligned here only if the alignment attribute
+          // of the instruction calls for default alignment.
+          //
+          // Use writeQ to get handling of traps right.  (The spec says to
+          // perform two individual word writes, but let's not worry about
+          // that.)
+          writeQ(address, (uint64_t(data[1]) << 32) | uint64_t(data[0]), instr,
+                 AllowUnaligned);
+          address += 8;
+          r++;
+        }
+        if (Rm != 15) {
+          if (Rm == 13) {
+            set_register(Rn, address);
+          } else {
+            set_register(Rn, get_register(Rn) + get_register(Rm));
+          }
+        }
+      } else if (instr->bits(21, 20) == 2) {
+        // vld1
+        int Vd = (instr->bit(22) << 4) | instr->vdValue();
+        int Rn = instr->vnValue();
+        int type = instr->bits(11, 8);
+        int Rm = instr->vmValue();
+        int32_t address = get_register(Rn);
+        int regs = 0;
+        switch (type) {
+          case nlt_1:
+            regs = 1;
+            break;
+          case nlt_2:
+            regs = 2;
+            break;
+          case nlt_3:
+            regs = 3;
+            break;
+          case nlt_4:
+            regs = 4;
+            break;
+          default:
+            MOZ_CRASH();
+            break;
+        }
+        int r = 0;
+        while (r < regs) {
+          uint32_t data[2];
+          // TODO: We should AllowUnaligned here only if the alignment attribute
+          // of the instruction calls for default alignment.
+          //
+          // Use readQ to get handling of traps right.  (The spec says to
+          // perform two individual word reads, but let's not worry about that.)
+          uint64_t tmp = readQ(address, instr, AllowUnaligned);
+          data[0] = tmp;
+          data[1] = tmp >> 32;
+          set_d_register(Vd + r, data);
+          address += 8;
+          r++;
+        }
+        if (Rm != 15) {
+          if (Rm == 13) {
+            set_register(Rn, address);
+          } else {
+            set_register(Rn, get_register(Rn) + get_register(Rm));
+          }
+        }
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 9:
+      if (instr->bits(9, 8) == 0) {
+        int Vd = (instr->bit(22) << 4) | instr->vdValue();
+        int Rn = instr->vnValue();
+        int size = instr->bits(11, 10);
+        int Rm = instr->vmValue();
+        int index = instr->bits(7, 5);
+        int align = instr->bit(4);
+        int32_t address = get_register(Rn);
+        if (size != 2 || align) {
+          MOZ_CRASH("NYI");
+        }
+        int a = instr->bits(5, 4);
+        if (a != 0 && a != 3) {
+          MOZ_CRASH("Unspecified");
+        }
+        if (index > 1) {
+          Vd++;
+          index -= 2;
+        }
+        uint32_t data[2];
+        get_d_register(Vd, data);
+        switch (instr->bits(21, 20)) {
+          case 0:
+            // vst1 single element from one lane
+            writeW(address, data[index], instr, AllowUnaligned);
+            break;
+          case 2:
+            // vld1 single element to one lane
+            data[index] = readW(address, instr, AllowUnaligned);
+            set_d_register(Vd, data);
+            break;
+          default:
+            MOZ_CRASH("NYI");
+        }
+        address += 4;
+        if (Rm != 15) {
+          if (Rm == 13) {
+            set_register(Rn, address);
+          } else {
+            set_register(Rn, get_register(Rn) + get_register(Rm));
+          }
+        }
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 0xA:
+      if (instr->bits(31, 20) == 0xf57) {
+        switch (instr->bits(7, 4)) {
+          case 1:  // CLREX
+            exclusiveMonitorClear();
+            break;
+          case 5:  // DMB
+            AtomicOperations::fenceSeqCst();
+            break;
+          case 4:  // DSB
+            // We do not use DSB.
+            MOZ_CRASH("DSB unimplemented");
+          case 6:  // ISB
+            // We do not use ISB.
+            MOZ_CRASH("ISB unimplemented");
+          default:
+            MOZ_CRASH();
+        }
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 0xB:
+      if (instr->bits(22, 20) == 5 && instr->bits(15, 12) == 0xf) {
+        // pld: ignore instruction.
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    case 0x1C:
+    case 0x1D:
+      if (instr->bit(4) == 1 && instr->bits(11, 9) != 5) {
+        // MCR, MCR2, MRC, MRC2 with cond == 15
+        decodeType7CoprocessorIns(instr);
+      } else {
+        MOZ_CRASH();
+      }
+      break;
+    default:
+      MOZ_CRASH();
+  }
+}
+
+// Executes the current instruction.
+void Simulator::instructionDecode(SimInstruction* instr) {
+  if (!SimulatorProcess::ICacheCheckingDisableCount) {
+    AutoLockSimulatorCache als;
+    SimulatorProcess::checkICacheLocked(instr);
+  }
+
+  pc_modified_ = false;
+
+  static const uint32_t kSpecialCondition = 15 << 28;
+  if (instr->conditionField() == kSpecialCondition) {
+    decodeSpecialCondition(instr);
+  } else if (conditionallyExecute(instr)) {
+    switch (instr->typeValue()) {
+      case 0:
+      case 1:
+        decodeType01(instr);
+        break;
+      case 2:
+        decodeType2(instr);
+        break;
+      case 3:
+        decodeType3(instr);
+        break;
+      case 4:
+        decodeType4(instr);
+        break;
+      case 5:
+        decodeType5(instr);
+        break;
+      case 6:
+        decodeType6(instr);
+        break;
+      case 7:
+        decodeType7(instr);
+        break;
+      default:
+        MOZ_CRASH();
+        break;
+    }
+    // If the instruction is a non taken conditional stop, we need to skip
+    // the inlined message address.
+  } else if (instr->isStop()) {
+    set_pc(get_pc() + 2 * SimInstruction::kInstrSize);
+  }
+  if (!pc_modified_) {
+    set_register(pc,
+                 reinterpret_cast<int32_t>(instr) + SimInstruction::kInstrSize);
+  }
+}
+
+void Simulator::enable_single_stepping(SingleStepCallback cb, void* arg) {
+  single_stepping_ = true;
+  single_step_callback_ = cb;
+  single_step_callback_arg_ = arg;
+  single_step_callback_(single_step_callback_arg_, this, (void*)get_pc());
+}
+
+void Simulator::disable_single_stepping() {
+  if (!single_stepping_) {
+    return;
+  }
+  single_step_callback_(single_step_callback_arg_, this, (void*)get_pc());
+  single_stepping_ = false;
+  single_step_callback_ = nullptr;
+  single_step_callback_arg_ = nullptr;
+}
+
+template <bool EnableStopSimAt>
+void Simulator::execute() {
+  if (single_stepping_) {
+    single_step_callback_(single_step_callback_arg_, this, nullptr);
+  }
+
+  // Get the PC to simulate. Cannot use the accessor here as we need the raw
+  // PC value and not the one used as input to arithmetic instructions.
+  int program_counter = get_pc();
+
+  while (program_counter != end_sim_pc) {
+    if (EnableStopSimAt && (icount_ == Simulator::StopSimAt)) {
+      fprintf(stderr, "\nStopped simulation at icount %lld\n", icount_);
+      ArmDebugger dbg(this);
+      dbg.debug();
+    } else {
+      if (single_stepping_) {
+        single_step_callback_(single_step_callback_arg_, this,
+                              (void*)program_counter);
+      }
+      SimInstruction* instr =
+          reinterpret_cast<SimInstruction*>(program_counter);
+      instructionDecode(instr);
+      icount_++;
+    }
+    program_counter = get_pc();
+  }
+
+  if (single_stepping_) {
+    single_step_callback_(single_step_callback_arg_, this, nullptr);
+  }
+}
+
+void Simulator::callInternal(uint8_t* entry) {
+  // Prepare to execute the code at entry.
+  set_register(pc, reinterpret_cast<int32_t>(entry));
+
+  // Put down marker for end of simulation. The simulator will stop simulation
+  // when the PC reaches this value. By saving the "end simulation" value into
+  // the LR the simulation stops when returning to this call point.
+  set_register(lr, end_sim_pc);
+
+  // Remember the values of callee-saved registers. The code below assumes
+  // that r9 is not used as sb (static base) in simulator code and therefore
+  // is regarded as a callee-saved register.
+  int32_t r4_val = get_register(r4);
+  int32_t r5_val = get_register(r5);
+  int32_t r6_val = get_register(r6);
+  int32_t r7_val = get_register(r7);
+  int32_t r8_val = get_register(r8);
+  int32_t r9_val = get_register(r9);
+  int32_t r10_val = get_register(r10);
+  int32_t r11_val = get_register(r11);
+
+  // Remember d8 to d15 which are callee-saved.
+  uint64_t d8_val;
+  get_d_register(d8, &d8_val);
+  uint64_t d9_val;
+  get_d_register(d9, &d9_val);
+  uint64_t d10_val;
+  get_d_register(d10, &d10_val);
+  uint64_t d11_val;
+  get_d_register(d11, &d11_val);
+  uint64_t d12_val;
+  get_d_register(d12, &d12_val);
+  uint64_t d13_val;
+  get_d_register(d13, &d13_val);
+  uint64_t d14_val;
+  get_d_register(d14, &d14_val);
+  uint64_t d15_val;
+  get_d_register(d15, &d15_val);
+
+  // Set up the callee-saved registers with a known value. To be able to check
+  // that they are preserved properly across JS execution.
+  int32_t callee_saved_value = uint32_t(icount_);
+  uint64_t callee_saved_value_d = uint64_t(icount_);
+
+  if (!skipCalleeSavedRegsCheck) {
+    set_register(r4, callee_saved_value);
+    set_register(r5, callee_saved_value);
+    set_register(r6, callee_saved_value);
+    set_register(r7, callee_saved_value);
+    set_register(r8, callee_saved_value);
+    set_register(r9, callee_saved_value);
+    set_register(r10, callee_saved_value);
+    set_register(r11, callee_saved_value);
+
+    set_d_register(d8, &callee_saved_value_d);
+    set_d_register(d9, &callee_saved_value_d);
+    set_d_register(d10, &callee_saved_value_d);
+    set_d_register(d11, &callee_saved_value_d);
+    set_d_register(d12, &callee_saved_value_d);
+    set_d_register(d13, &callee_saved_value_d);
+    set_d_register(d14, &callee_saved_value_d);
+    set_d_register(d15, &callee_saved_value_d);
+  }
+  // Start the simulation.
+  if (Simulator::StopSimAt != -1L) {
+    execute<true>();
+  } else {
+    execute<false>();
+  }
+
+  if (!skipCalleeSavedRegsCheck) {
+    // Check that the callee-saved registers have been preserved.
+    MOZ_ASSERT(callee_saved_value == get_register(r4));
+    MOZ_ASSERT(callee_saved_value == get_register(r5));
+    MOZ_ASSERT(callee_saved_value == get_register(r6));
+    MOZ_ASSERT(callee_saved_value == get_register(r7));
+    MOZ_ASSERT(callee_saved_value == get_register(r8));
+    MOZ_ASSERT(callee_saved_value == get_register(r9));
+    MOZ_ASSERT(callee_saved_value == get_register(r10));
+    MOZ_ASSERT(callee_saved_value == get_register(r11));
+
+    uint64_t value;
+    get_d_register(d8, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d9, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d10, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d11, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d12, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d13, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d14, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+    get_d_register(d15, &value);
+    MOZ_ASSERT(callee_saved_value_d == value);
+
+    // Restore callee-saved registers with the original value.
+    set_register(r4, r4_val);
+    set_register(r5, r5_val);
+    set_register(r6, r6_val);
+    set_register(r7, r7_val);
+    set_register(r8, r8_val);
+    set_register(r9, r9_val);
+    set_register(r10, r10_val);
+    set_register(r11, r11_val);
+
+    set_d_register(d8, &d8_val);
+    set_d_register(d9, &d9_val);
+    set_d_register(d10, &d10_val);
+    set_d_register(d11, &d11_val);
+    set_d_register(d12, &d12_val);
+    set_d_register(d13, &d13_val);
+    set_d_register(d14, &d14_val);
+    set_d_register(d15, &d15_val);
+  }
+}
+
+int32_t Simulator::call(uint8_t* entry, int argument_count, ...) {
+  va_list parameters;
+  va_start(parameters, argument_count);
+
+  // First four arguments passed in registers.
+  if (argument_count >= 1) {
+    set_register(r0, va_arg(parameters, int32_t));
+  }
+  if (argument_count >= 2) {
+    set_register(r1, va_arg(parameters, int32_t));
+  }
+  if (argument_count >= 3) {
+    set_register(r2, va_arg(parameters, int32_t));
+  }
+  if (argument_count >= 4) {
+    set_register(r3, va_arg(parameters, int32_t));
+  }
+
+  // Remaining arguments passed on stack.
+  int original_stack = get_register(sp);
+  int entry_stack = original_stack;
+  if (argument_count >= 4) {
+    entry_stack -= (argument_count - 4) * sizeof(int32_t);
+  }
+
+  entry_stack &= ~ABIStackAlignment;
+
+  // Store remaining arguments on stack, from low to high memory.
+  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
+  for (int i = 4; i < argument_count; i++) {
+    stack_argument[i - 4] = va_arg(parameters, int32_t);
+  }
+  va_end(parameters);
+  set_register(sp, entry_stack);
+
+  callInternal(entry);
+
+  // Pop stack passed arguments.
+  MOZ_ASSERT(entry_stack == get_register(sp));
+  set_register(sp, original_stack);
+
+  int32_t result = get_register(r0);
+  return result;
+}
+
+Simulator* Simulator::Current() {
+  JSContext* cx = TlsContext.get();
+  MOZ_ASSERT(CurrentThreadCanAccessRuntime(cx->runtime()));
+  return cx->simulator();
+}
+
+}  // namespace jit
+}  // namespace js
+
+js::jit::Simulator* JSContext::simulator() const { return simulator_; }
diff --git a/js/src/jit/arm/Simulator-arm.h b/js/src/jit/arm/Simulator-arm.h
new file mode 100644
index 0000000000..fba0f8ce5e
--- /dev/null
+++ b/js/src/jit/arm/Simulator-arm.h
@@ -0,0 +1,632 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef jit_arm_Simulator_arm_h
+#define jit_arm_Simulator_arm_h
+
+#ifdef JS_SIMULATOR_ARM
+
+#  include "mozilla/Atomics.h"
+
+#  include "jit/arm/Architecture-arm.h"
+#  include "jit/arm/disasm/Disasm-arm.h"
+#  include "jit/IonTypes.h"
+#  include "js/AllocPolicy.h"
+#  include "js/ProfilingFrameIterator.h"
+#  include "threading/Thread.h"
+#  include "vm/MutexIDs.h"
+#  include "wasm/WasmSignalHandlers.h"
+
+namespace js {
+namespace jit {
+
+class JitActivation;
+class Simulator;
+class Redirection;
+class CachePage;
+class AutoLockSimulator;
+
+// When the SingleStepCallback is called, the simulator is about to execute
+// sim->get_pc() and the current machine state represents the completed
+// execution of the previous pc.
+typedef void (*SingleStepCallback)(void* arg, Simulator* sim, void* pc);
+
+// VFP rounding modes. See ARM DDI 0406B Page A2-29.
+enum VFPRoundingMode {
+  SimRN = 0 << 22,  // Round to Nearest.
+  SimRP = 1 << 22,  // Round towards Plus Infinity.
+  SimRM = 2 << 22,  // Round towards Minus Infinity.
+  SimRZ = 3 << 22,  // Round towards zero.
+
+  // Aliases.
+  kRoundToNearest = SimRN,
+  kRoundToPlusInf = SimRP,
+  kRoundToMinusInf = SimRM,
+  kRoundToZero = SimRZ
+};
+
+const uint32_t kVFPRoundingModeMask = 3 << 22;
+
+typedef int32_t Instr;
+class SimInstruction;
+
+// Per thread simulator state.
+class Simulator {
+ public:
+  friend class ArmDebugger;
+  enum Register {
+    no_reg = -1,
+    r0 = 0,
+    r1,
+    r2,
+    r3,
+    r4,
+    r5,
+    r6,
+    r7,
+    r8,
+    r9,
+    r10,
+    r11,
+    r12,
+    r13,
+    r14,
+    r15,
+    num_registers,
+    fp = 11,
+    ip = 12,
+    sp = 13,
+    lr = 14,
+    pc = 15,
+    s0 = 0,
+    s1,
+    s2,
+    s3,
+    s4,
+    s5,
+    s6,
+    s7,
+    s8,
+    s9,
+    s10,
+    s11,
+    s12,
+    s13,
+    s14,
+    s15,
+    s16,
+    s17,
+    s18,
+    s19,
+    s20,
+    s21,
+    s22,
+    s23,
+    s24,
+    s25,
+    s26,
+    s27,
+    s28,
+    s29,
+    s30,
+    s31,
+    num_s_registers = 32,
+    d0 = 0,
+    d1,
+    d2,
+    d3,
+    d4,
+    d5,
+    d6,
+    d7,
+    d8,
+    d9,
+    d10,
+    d11,
+    d12,
+    d13,
+    d14,
+    d15,
+    d16,
+    d17,
+    d18,
+    d19,
+    d20,
+    d21,
+    d22,
+    d23,
+    d24,
+    d25,
+    d26,
+    d27,
+    d28,
+    d29,
+    d30,
+    d31,
+    num_d_registers = 32,
+    q0 = 0,
+    q1,
+    q2,
+    q3,
+    q4,
+    q5,
+    q6,
+    q7,
+    q8,
+    q9,
+    q10,
+    q11,
+    q12,
+    q13,
+    q14,
+    q15,
+    num_q_registers = 16
+  };
+
+  // Returns nullptr on OOM.
+  static Simulator* Create();
+
+  static void Destroy(Simulator* simulator);
+
+  // Constructor/destructor are for internal use only; use the static methods
+  // above.
+  Simulator();
+  ~Simulator();
+
+  // The currently executing Simulator instance. Potentially there can be one
+  // for each native thread.
+  static Simulator* Current();
+
+  static uintptr_t StackLimit() { return Simulator::Current()->stackLimit(); }
+
+  // Disassemble some instructions starting at instr and print them
+  // on stdout.  Useful for working within GDB after a MOZ_CRASH(),
+  // among other things.
+  //
+  // Typical use within a crashed instruction decoding method is simply:
+  //
+  //   call Simulator::disassemble(instr, 1)
+  //
+  // or use one of the more convenient inline methods below.
+  static void disassemble(SimInstruction* instr, size_t n);
+
+  // Disassemble one instruction.
+  // "call disasm(instr)"
+  void disasm(SimInstruction* instr);
+
+  // Disassemble n instructions starting at instr.
+  // "call disasm(instr, 3)"
+  void disasm(SimInstruction* instr, size_t n);
+
+  // Skip backwards m instructions before starting, then disassemble n
+  // instructions.
+  // "call disasm(instr, 3, 7)"
+  void disasm(SimInstruction* instr, size_t m, size_t n);
+
+  uintptr_t* addressOfStackLimit();
+
+  // Accessors for register state. Reading the pc value adheres to the ARM
+  // architecture specification and is off by a 8 from the currently executing
+  // instruction.
+  void set_register(int reg, int32_t value);
+  int32_t get_register(int reg) const;
+  double get_double_from_register_pair(int reg);
+  void set_register_pair_from_double(int reg, double* value);
+  void set_dw_register(int dreg, const int* dbl);
+
+  // Support for VFP.
+  void get_d_register(int dreg, uint64_t* value);
+  void set_d_register(int dreg, const uint64_t* value);
+  void get_d_register(int dreg, uint32_t* value);
+  void set_d_register(int dreg, const uint32_t* value);
+  void get_q_register(int qreg, uint64_t* value);
+  void set_q_register(int qreg, const uint64_t* value);
+  void get_q_register(int qreg, uint32_t* value);
+  void set_q_register(int qreg, const uint32_t* value);
+  void set_s_register(int reg, unsigned int value);
+  unsigned int get_s_register(int reg) const;
+
+  void set_d_register_from_double(int dreg, const double& dbl) {
+    setVFPRegister<double, 2>(dreg, dbl);
+  }
+  void get_double_from_d_register(int dreg, double* out) {
+    getFromVFPRegister<double, 2>(dreg, out);
+  }
+  void set_s_register_from_float(int sreg, const float flt) {
+    setVFPRegister<float, 1>(sreg, flt);
+  }
+  void get_float_from_s_register(int sreg, float* out) {
+    getFromVFPRegister<float, 1>(sreg, out);
+  }
+  void set_s_register_from_sinteger(int sreg, const int sint) {
+    setVFPRegister<int, 1>(sreg, sint);
+  }
+  int get_sinteger_from_s_register(int sreg) {
+    int ret;
+    getFromVFPRegister<int, 1>(sreg, &ret);
+    return ret;
+  }
+
+  // Special case of set_register and get_register to access the raw PC value.
+  void set_pc(int32_t value);
+  int32_t get_pc() const;
+
+  template <typename T>
+  T get_pc_as() const {
+    return reinterpret_cast<T>(get_pc());
+  }
+
+  void enable_single_stepping(SingleStepCallback cb, void* arg);
+  void disable_single_stepping();
+
+  uintptr_t stackLimit() const;
+  bool overRecursed(uintptr_t newsp = 0) const;
+  bool overRecursedWithExtra(uint32_t extra) const;
+
+  // Executes ARM instructions until the PC reaches end_sim_pc.
+  template <bool EnableStopSimAt>
+  void execute();
+
+  // Sets up the simulator state and grabs the result on return.
+  int32_t call(uint8_t* entry, int argument_count, ...);
+
+  // Debugger input.
+  void setLastDebuggerInput(char* input);
+  char* lastDebuggerInput() { return lastDebuggerInput_; }
+
+  // Returns true if pc register contains one of the 'special_values' defined
+  // below (bad_lr, end_sim_pc).
+  bool has_bad_pc() const;
+
+ private:
+  enum special_values {
+    // Known bad pc value to ensure that the simulator does not execute
+    // without being properly setup.
+    bad_lr = -1,
+    // A pc value used to signal the simulator to stop execution. Generally
+    // the lr is set to this value on transition from native C code to
+    // simulated execution, so that the simulator can "return" to the native
+    // C code.
+    end_sim_pc = -2
+  };
+
+  // ForbidUnaligned means "always fault on unaligned access".
+  //
+  // AllowUnaligned means "allow the unaligned access if other conditions are
+  // met".  The "other conditions" vary with the instruction: For all
+  // instructions the base condition is !HasAlignmentFault(), ie, the chip is
+  // configured to allow unaligned accesses.  For instructions like VLD1
+  // there is an additional constraint that the alignment attribute in the
+  // instruction must be set to "default alignment".
+
+  enum UnalignedPolicy { ForbidUnaligned, AllowUnaligned };
+
+  bool init();
+
+  // Checks if the current instruction should be executed based on its
+  // condition bits.
+  inline bool conditionallyExecute(SimInstruction* instr);
+
+  // Helper functions to set the conditional flags in the architecture state.
+  void setNZFlags(int32_t val);
+  void setCFlag(bool val);
+  void setVFlag(bool val);
+  bool carryFrom(int32_t left, int32_t right, int32_t carry = 0);
+  bool borrowFrom(int32_t left, int32_t right);
+  bool overflowFrom(int32_t alu_out, int32_t left, int32_t right,
+                    bool addition);
+
+  inline int getCarry() { return c_flag_ ? 1 : 0; };
+
+  // Support for VFP.
+  void compute_FPSCR_Flags(double val1, double val2);
+  void copy_FPSCR_to_APSR();
+  inline void canonicalizeNaN(double* value);
+  inline void canonicalizeNaN(float* value);
+
+  // Helper functions to decode common "addressing" modes
+  int32_t getShiftRm(SimInstruction* instr, bool* carry_out);
+  int32_t getImm(SimInstruction* instr, bool* carry_out);
+  int32_t processPU(SimInstruction* instr, int num_regs, int operand_size,
+                    intptr_t* start_address, intptr_t* end_address);
+  void handleRList(SimInstruction* instr, bool load);
+  void handleVList(SimInstruction* inst);
+  void softwareInterrupt(SimInstruction* instr);
+
+  // Stop helper functions.
+  inline bool isStopInstruction(SimInstruction* instr);
+  inline bool isWatchedStop(uint32_t bkpt_code);
+  inline bool isEnabledStop(uint32_t bkpt_code);
+  inline void enableStop(uint32_t bkpt_code);
+  inline void disableStop(uint32_t bkpt_code);
+  inline void increaseStopCounter(uint32_t bkpt_code);
+  void printStopInfo(uint32_t code);
+
+  // Handle a wasm interrupt triggered by an async signal handler.
+  JS::ProfilingFrameIterator::RegisterState registerState();
+
+  // Handle any wasm faults, returning true if the fault was handled.
+  // This method is rather hot so inline the normal (no-wasm) case.
+  bool MOZ_ALWAYS_INLINE handleWasmSegFault(int32_t addr, unsigned numBytes) {
+    if (MOZ_LIKELY(!wasm::CodeExists)) {
+      return false;
+    }
+
+    uint8_t* newPC;
+    if (!wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes,
+                                 &newPC)) {
+      return false;
+    }
+
+    set_pc(int32_t(newPC));
+    return true;
+  }
+
+  // Read and write memory.
+  inline uint8_t readBU(int32_t addr);
+  inline int8_t readB(int32_t addr);
+  inline void writeB(int32_t addr, uint8_t value);
+  inline void writeB(int32_t addr, int8_t value);
+
+  inline uint8_t readExBU(int32_t addr);
+  inline int32_t writeExB(int32_t addr, uint8_t value);
+
+  inline uint16_t readHU(int32_t addr, SimInstruction* instr);
+  inline int16_t readH(int32_t addr, SimInstruction* instr);
+  // Note: Overloaded on the sign of the value.
+  inline void writeH(int32_t addr, uint16_t value, SimInstruction* instr);
+  inline void writeH(int32_t addr, int16_t value, SimInstruction* instr);
+
+  inline uint16_t readExHU(int32_t addr, SimInstruction* instr);
+  inline int32_t writeExH(int32_t addr, uint16_t value, SimInstruction* instr);
+
+  inline int readW(int32_t addr, SimInstruction* instr,
+                   UnalignedPolicy f = ForbidUnaligned);
+  inline void writeW(int32_t addr, int value, SimInstruction* instr,
+                     UnalignedPolicy f = ForbidUnaligned);
+
+  inline uint64_t readQ(int32_t addr, SimInstruction* instr,
+                        UnalignedPolicy f = ForbidUnaligned);
+  inline void writeQ(int32_t addr, uint64_t value, SimInstruction* instr,
+                     UnalignedPolicy f = ForbidUnaligned);
+
+  inline int readExW(int32_t addr, SimInstruction* instr);
+  inline int writeExW(int32_t addr, int value, SimInstruction* instr);
+
+  int32_t* readDW(int32_t addr);
+  void writeDW(int32_t addr, int32_t value1, int32_t value2);
+
+  int32_t readExDW(int32_t addr, int32_t* hibits);
+  int32_t writeExDW(int32_t addr, int32_t value1, int32_t value2);
+
+  // Executing is handled based on the instruction type.
+  // Both type 0 and type 1 rolled into one.
+  void decodeType01(SimInstruction* instr);
+  void decodeType2(SimInstruction* instr);
+  void decodeType3(SimInstruction* instr);
+  void decodeType4(SimInstruction* instr);
+  void decodeType5(SimInstruction* instr);
+  void decodeType6(SimInstruction* instr);
+  void decodeType7(SimInstruction* instr);
+
+  // Support for VFP.
+  void decodeTypeVFP(SimInstruction* instr);
+  void decodeType6CoprocessorIns(SimInstruction* instr);
+  void decodeSpecialCondition(SimInstruction* instr);
+
+  void decodeVMOVBetweenCoreAndSinglePrecisionRegisters(SimInstruction* instr);
+  void decodeVCMP(SimInstruction* instr);
+  void decodeVCVTBetweenDoubleAndSingle(SimInstruction* instr);
+  void decodeVCVTBetweenFloatingPointAndInteger(SimInstruction* instr);
+  void decodeVCVTBetweenFloatingPointAndIntegerFrac(SimInstruction* instr);
+
+  // Support for some system functions.
+  void decodeType7CoprocessorIns(SimInstruction* instr);
+
+  // Executes one instruction.
+  void instructionDecode(SimInstruction* instr);
+
+ public:
+  static int64_t StopSimAt;
+
+  // For testing the MoveResolver code, a MoveResolver is set up, and
+  // the VFP registers are loaded with pre-determined values,
+  // then the sequence of code is simulated.  In order to test this with the
+  // simulator, the callee-saved registers can't be trashed. This flag
+  // disables that feature.
+  bool skipCalleeSavedRegsCheck;
+
+  // Runtime call support.
+  static void* RedirectNativeFunction(void* nativeFunction,
+                                      ABIFunctionType type);
+
+ private:
+  // Handle arguments and return value for runtime FP functions.
+  void getFpArgs(double* x, double* y, int32_t* z);
+  void getFpFromStack(int32_t* stack, double* x1);
+  void setCallResultDouble(double result);
+  void setCallResultFloat(float result);
+  void setCallResult(int64_t res);
+  void scratchVolatileRegisters(bool scratchFloat = true);
+
+  template <class ReturnType, int register_size>
+  void getFromVFPRegister(int reg_index, ReturnType* out);
+
+  template <class InputType, int register_size>
+  void setVFPRegister(int reg_index, const InputType& value);
+
+  void callInternal(uint8_t* entry);
+
+  // Architecture state.
+  // Saturating instructions require a Q flag to indicate saturation.
+  // There is currently no way to read the CPSR directly, and thus read the Q
+  // flag, so this is left unimplemented.
+  int32_t registers_[16];
+  bool n_flag_;
+  bool z_flag_;
+  bool c_flag_;
+  bool v_flag_;
+
+  // VFP architecture state.
+  uint32_t vfp_registers_[num_d_registers * 2];
+  bool n_flag_FPSCR_;
+  bool z_flag_FPSCR_;
+  bool c_flag_FPSCR_;
+  bool v_flag_FPSCR_;
+
+  // VFP rounding mode. See ARM DDI 0406B Page A2-29.
+  VFPRoundingMode FPSCR_rounding_mode_;
+  bool FPSCR_default_NaN_mode_;
+
+  // VFP FP exception flags architecture state.
+  bool inv_op_vfp_flag_;
+  bool div_zero_vfp_flag_;
+  bool overflow_vfp_flag_;
+  bool underflow_vfp_flag_;
+  bool inexact_vfp_flag_;
+
+  // Simulator support.
+  char* stack_;
+  uintptr_t stackLimit_;
+  bool pc_modified_;
+  int64_t icount_;
+
+  // Debugger input.
+  char* lastDebuggerInput_;
+
+  // Registered breakpoints.
+  SimInstruction* break_pc_;
+  Instr break_instr_;
+
+  // Single-stepping support
+  bool single_stepping_;
+  SingleStepCallback single_step_callback_;
+  void* single_step_callback_arg_;
+
+  // A stop is watched if its code is less than kNumOfWatchedStops.
+  // Only watched stops support enabling/disabling and the counter feature.
+  static const uint32_t kNumOfWatchedStops = 256;
+
+  // Breakpoint is disabled if bit 31 is set.
+  static const uint32_t kStopDisabledBit = 1 << 31;
+
+  // A stop is enabled, meaning the simulator will stop when meeting the
+  // instruction, if bit 31 of watched_stops_[code].count is unset.
+  // The value watched_stops_[code].count & ~(1 << 31) indicates how many times
+  // the breakpoint was hit or gone through.
+  struct StopCountAndDesc {
+    uint32_t count;
+    char* desc;
+  };
+  StopCountAndDesc watched_stops_[kNumOfWatchedStops];
+
+ public:
+  int64_t icount() { return icount_; }
+
+ private:
+  // Exclusive access monitor
+  void exclusiveMonitorSet(uint64_t value);
+  uint64_t exclusiveMonitorGetAndClear(bool* held);
+  void exclusiveMonitorClear();
+
+  bool exclusiveMonitorHeld_;
+  uint64_t exclusiveMonitor_;
+};
+
+// Process wide simulator state.
+class SimulatorProcess {
+  friend class Redirection;
+  friend class AutoLockSimulatorCache;
+
+ private:
+  // ICache checking.
+  struct ICacheHasher {
+    typedef void* Key;
+    typedef void* Lookup;
+    static HashNumber hash(const Lookup& l);
+    static bool match(const Key& k, const Lookup& l);
+  };
+
+ public:
+  typedef HashMap<void*, CachePage*, ICacheHasher, SystemAllocPolicy> ICacheMap;
+
+  static mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
+      ICacheCheckingDisableCount;
+  static void FlushICache(void* start, size_t size);
+
+  static void checkICacheLocked(SimInstruction* instr);
+
+  static bool initialize() {
+    singleton_ = js_new<SimulatorProcess>();
+    return singleton_;
+  }
+  static void destroy() {
+    js_delete(singleton_);
+    singleton_ = nullptr;
+  }
+
+  SimulatorProcess();
+  ~SimulatorProcess();
+
+ private:
+  static SimulatorProcess* singleton_;
+
+  // This lock creates a critical section around 'redirection_' and
+  // 'icache_', which are referenced both by the execution engine
+  // and by the off-thread compiler (see Redirection::Get in the cpp file).
+  Mutex cacheLock_ MOZ_UNANNOTATED;
+
+  Redirection* redirection_;
+  ICacheMap icache_;
+
+ public:
+  static ICacheMap& icache() {
+    // Technically we need the lock to access the innards of the
+    // icache, not to take its address, but the latter condition
+    // serves as a useful complement to the former.
+    singleton_->cacheLock_.assertOwnedByCurrentThread();
+    return singleton_->icache_;
+  }
+
+  static Redirection* redirection() {
+    singleton_->cacheLock_.assertOwnedByCurrentThread();
+    return singleton_->redirection_;
+  }
+
+  static void setRedirection(js::jit::Redirection* redirection) {
+    singleton_->cacheLock_.assertOwnedByCurrentThread();
+    singleton_->redirection_ = redirection;
+  }
+};
+
+}  // namespace jit
+}  // namespace js
+
+#endif /* JS_SIMULATOR_ARM */
+
+#endif /* jit_arm_Simulator_arm_h */
diff --git a/js/src/jit/arm/Trampoline-arm.cpp b/js/src/jit/arm/Trampoline-arm.cpp
new file mode 100644
index 0000000000..551f243bd3
--- /dev/null
+++ b/js/src/jit/arm/Trampoline-arm.cpp
@@ -0,0 +1,831 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm/SharedICHelpers-arm.h"
+#include "jit/Bailouts.h"
+#include "jit/BaselineFrame.h"
+#include "jit/CalleeToken.h"
+#include "jit/JitFrames.h"
+#include "jit/JitRuntime.h"
+#include "jit/JitSpewer.h"
+#include "jit/PerfSpewer.h"
+#include "jit/VMFunctions.h"
+#include "vm/JitActivation.h"  // js::jit::JitActivation
+#include "vm/JSContext.h"
+#include "vm/Realm.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+static const FloatRegisterSet NonVolatileFloatRegs = FloatRegisterSet(
+    (1ULL << FloatRegisters::d8) | (1ULL << FloatRegisters::d9) |
+    (1ULL << FloatRegisters::d10) | (1ULL << FloatRegisters::d11) |
+    (1ULL << FloatRegisters::d12) | (1ULL << FloatRegisters::d13) |
+    (1ULL << FloatRegisters::d14) | (1ULL << FloatRegisters::d15));
+
+static void GenerateReturn(MacroAssembler& masm, int returnCode) {
+  // Restore non-volatile floating point registers.
+  masm.transferMultipleByRuns(NonVolatileFloatRegs, IsLoad, StackPointer, IA);
+
+  // Get rid of padding word.
+  masm.addPtr(Imm32(sizeof(void*)), sp);
+
+  // Set up return value
+  masm.ma_mov(Imm32(returnCode), r0);
+
+  // Pop and return
+  masm.startDataTransferM(IsLoad, sp, IA, WriteBack);
+  masm.transferReg(r4);
+  masm.transferReg(r5);
+  masm.transferReg(r6);
+  masm.transferReg(r7);
+  masm.transferReg(r8);
+  masm.transferReg(r9);
+  masm.transferReg(r10);
+  masm.transferReg(r11);
+  // r12 isn't saved, so it shouldn't be restored.
+  masm.transferReg(pc);
+  masm.finishDataTransfer();
+  masm.flushBuffer();
+}
+
+struct EnterJITStack {
+  double d8;
+  double d9;
+  double d10;
+  double d11;
+  double d12;
+  double d13;
+  double d14;
+  double d15;
+
+  // Padding.
+  void* padding;
+
+  // Non-volatile registers.
+  void* r4;
+  void* r5;
+  void* r6;
+  void* r7;
+  void* r8;
+  void* r9;
+  void* r10;
+  void* r11;
+  // The abi does not expect r12 (ip) to be preserved
+  void* lr;
+
+  // Arguments.
+  // code == r0
+  // argc == r1
+  // argv == r2
+  // frame == r3
+  CalleeToken token;
+  JSObject* scopeChain;
+  size_t numStackValues;
+  Value* vp;
+};
+
+/*
+ * This method generates a trampoline for a c++ function with the following
+ * signature:
+ *   void enter(void* code, int argc, Value* argv, InterpreterFrame* fp,
+ *              CalleeToken calleeToken, JSObject* scopeChain, Value* vp)
+ *   ...using standard EABI calling convention
+ */
+void JitRuntime::generateEnterJIT(JSContext* cx, MacroAssembler& masm) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateEnterJIT");
+
+  enterJITOffset_ = startTrampolineCode(masm);
+
+  const Address slot_token(sp, offsetof(EnterJITStack, token));
+  const Address slot_vp(sp, offsetof(EnterJITStack, vp));
+
+  static_assert(OsrFrameReg == r3);
+
+  Assembler* aasm = &masm;
+
+  // Save non-volatile registers. These must be saved by the trampoline,
+  // rather than the JIT'd code, because they are scanned by the conservative
+  // scanner.
+  masm.startDataTransferM(IsStore, sp, DB, WriteBack);
+  masm.transferReg(r4);   // [sp,0]
+  masm.transferReg(r5);   // [sp,4]
+  masm.transferReg(r6);   // [sp,8]
+  masm.transferReg(r7);   // [sp,12]
+  masm.transferReg(r8);   // [sp,16]
+  masm.transferReg(r9);   // [sp,20]
+  masm.transferReg(r10);  // [sp,24]
+  masm.transferReg(r11);  // [sp,28]
+  // The abi does not expect r12 (ip) to be preserved
+  masm.transferReg(lr);  // [sp,32]
+  // The 5th argument is located at [sp, 36]
+  masm.finishDataTransfer();
+
+  // Add padding word.
+  masm.subPtr(Imm32(sizeof(void*)), sp);
+
+  // Push the float registers.
+  masm.transferMultipleByRuns(NonVolatileFloatRegs, IsStore, sp, DB);
+
+  // Load calleeToken into r9.
+  masm.loadPtr(slot_token, r9);
+
+  // Save stack pointer.
+  masm.movePtr(sp, r11);
+
+  // Load the number of actual arguments into r10.
+  masm.loadPtr(slot_vp, r10);
+  masm.unboxInt32(Address(r10, 0), r10);
+
+  {
+    Label noNewTarget;
+    masm.branchTest32(Assembler::Zero, r9,
+                      Imm32(CalleeToken_FunctionConstructing), &noNewTarget);
+
+    masm.add32(Imm32(1), r1);
+
+    masm.bind(&noNewTarget);
+  }
+
+  // Guarantee stack alignment of Jit frames.
+  //
+  // This code moves the stack pointer to the location where it should be when
+  // we enter the Jit frame.  It moves the stack pointer such that we have
+  // enough space reserved for pushing the arguments, and the JitFrameLayout.
+  // The stack pointer is also aligned on the alignment expected by the Jit
+  // frames.
+  //
+  // At the end the register r4, is a pointer to the stack where the first
+  // argument is expected by the Jit frame.
+  //
+  aasm->as_sub(r4, sp, O2RegImmShift(r1, LSL, 3));  // r4 = sp - argc*8
+  aasm->as_bic(r4, r4, Imm8(JitStackAlignment - 1));
+  // r4 is now the aligned on the bottom of the list of arguments.
+  static_assert(
+      sizeof(JitFrameLayout) % JitStackAlignment == 0,
+      "No need to consider the JitFrameLayout for aligning the stack");
+  // sp' = ~(JitStackAlignment - 1) & (sp - argc * sizeof(Value))
+  masm.movePtr(r4, sp);
+
+  // Get a copy of the number of args to use as a decrement counter, also set
+  // the zero condition code.
+  aasm->as_mov(r5, O2Reg(r1), SetCC);
+
+  // Loop over arguments, copying them from an unknown buffer onto the Ion
+  // stack so they can be accessed from JIT'ed code.
+  {
+    Label header, footer;
+    // If there aren't any arguments, don't do anything.
+    aasm->as_b(&footer, Assembler::Zero);
+    // Get the top of the loop.
+    masm.bind(&header);
+    aasm->as_sub(r5, r5, Imm8(1), SetCC);
+    // We could be more awesome, and unroll this, using a loadm
+    // (particularly since the offset is effectively 0) but that seems more
+    // error prone, and complex.
+    // BIG FAT WARNING: this loads both r6 and r7.
+    aasm->as_extdtr(IsLoad, 64, true, PostIndex, r6,
+                    EDtrAddr(r2, EDtrOffImm(8)));
+    aasm->as_extdtr(IsStore, 64, true, PostIndex, r6,
+                    EDtrAddr(r4, EDtrOffImm(8)));
+    aasm->as_b(&header, Assembler::NonZero);
+    masm.bind(&footer);
+  }
+
+  // Push the callee token.
+  masm.push(r9);
+
+  // Push the frame descriptor.
+  masm.pushFrameDescriptorForJitCall(FrameType::CppToJSJit, r10, r10);
+
+  Label returnLabel;
+  {
+    // Handle Interpreter -> Baseline OSR.
+    AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
+    MOZ_ASSERT(!regs.has(r11));
+    regs.take(OsrFrameReg);
+    regs.take(r0);  // jitcode
+    MOZ_ASSERT(!regs.has(ReturnReg), "ReturnReg matches r0");
+
+    const Address slot_numStackValues(r11,
+                                      offsetof(EnterJITStack, numStackValues));
+
+    Label notOsr;
+    masm.branchTestPtr(Assembler::Zero, OsrFrameReg, OsrFrameReg, &notOsr);
+
+    Register scratch = regs.takeAny();
+
+    Register numStackValues = regs.takeAny();
+    masm.load32(slot_numStackValues, numStackValues);
+
+    // Write return address. On ARM, CodeLabel is only used for tableswitch,
+    // so we can't use it here to get the return address. Instead, we use pc
+    // + a fixed offset to a jump to returnLabel. The pc register holds pc +
+    // 8, so we add the size of 2 instructions to skip the instructions
+    // emitted by push and jump(&skipJump).
+    {
+      AutoForbidPoolsAndNops afp(&masm, 5);
+      Label skipJump;
+      masm.mov(pc, scratch);
+      masm.addPtr(Imm32(2 * sizeof(uint32_t)), scratch);
+      masm.push(scratch);
+      masm.jump(&skipJump);
+      masm.jump(&returnLabel);
+      masm.bind(&skipJump);
+    }
+
+    // Frame prologue.
+    masm.push(FramePointer);
+    masm.mov(sp, FramePointer);
+
+    // Reserve frame.
+    masm.subPtr(Imm32(BaselineFrame::Size()), sp);
+
+    Register framePtrScratch = regs.takeAny();
+    masm.touchFrameValues(numStackValues, scratch, framePtrScratch);
+    masm.mov(sp, framePtrScratch);
+
+    // Reserve space for locals and stack values.
+    masm.ma_lsl(Imm32(3), numStackValues, scratch);
+    masm.ma_sub(sp, scratch, sp);
+
+    // Enter exit frame.
+    masm.pushFrameDescriptor(FrameType::BaselineJS);
+    masm.push(Imm32(0));  // Fake return address.
+    masm.push(FramePointer);
+    // No GC things to mark on the stack, push a bare token.
+    masm.loadJSContext(scratch);
+    masm.enterFakeExitFrame(scratch, scratch, ExitFrameType::Bare);
+
+    masm.push(r0);  // jitcode
+
+    using Fn = bool (*)(BaselineFrame * frame, InterpreterFrame * interpFrame,
+                        uint32_t numStackValues);
+    masm.setupUnalignedABICall(scratch);
+    masm.passABIArg(framePtrScratch);  // BaselineFrame
+    masm.passABIArg(OsrFrameReg);      // InterpreterFrame
+    masm.passABIArg(numStackValues);
+    masm.callWithABI<Fn, jit::InitBaselineFrameForOsr>(
+        MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+    Register jitcode = regs.takeAny();
+    masm.pop(jitcode);
+
+    MOZ_ASSERT(jitcode != ReturnReg);
+
+    Label error;
+    masm.addPtr(Imm32(ExitFrameLayout::SizeWithFooter()), sp);
+    masm.branchIfFalseBool(ReturnReg, &error);
+
+    // If OSR-ing, then emit instrumentation for setting lastProfilerFrame
+    // if profiler instrumentation is enabled.
+    {
+      Label skipProfilingInstrumentation;
+      AbsoluteAddress addressOfEnabled(
+          cx->runtime()->geckoProfiler().addressOfEnabled());
+      masm.branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
+                    &skipProfilingInstrumentation);
+      masm.profilerEnterFrame(FramePointer, scratch);
+      masm.bind(&skipProfilingInstrumentation);
+    }
+
+    masm.jump(jitcode);
+
+    // OOM: frame epilogue, load error value, discard return address and return.
+    masm.bind(&error);
+    masm.mov(FramePointer, sp);
+    masm.pop(FramePointer);
+    masm.addPtr(Imm32(sizeof(uintptr_t)), sp);  // Return address.
+    masm.moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
+    masm.jump(&returnLabel);
+
+    masm.bind(&notOsr);
+    // Load the scope chain in R1.
+    MOZ_ASSERT(R1.scratchReg() != r0);
+    masm.loadPtr(Address(r11, offsetof(EnterJITStack, scopeChain)),
+                 R1.scratchReg());
+  }
+
+  // The callee will push the return address and frame pointer on the stack,
+  // thus we check that the stack would be aligned once the call is complete.
+  masm.assertStackAlignment(JitStackAlignment, 2 * sizeof(uintptr_t));
+
+  // Call the function.
+  masm.callJitNoProfiler(r0);
+
+  // Interpreter -> Baseline OSR will return here.
+  masm.bind(&returnLabel);
+
+  // Discard arguments and padding. Set sp to the address of the EnterJITStack
+  // on the stack.
+  masm.mov(r11, sp);
+
+  // Store the returned value into the slot_vp
+  masm.loadPtr(slot_vp, r5);
+  masm.storeValue(JSReturnOperand, Address(r5, 0));
+
+  // Restore non-volatile registers and return.
+  GenerateReturn(masm, true);
+}
+
+// static
+mozilla::Maybe<::JS::ProfilingFrameIterator::RegisterState>
+JitRuntime::getCppEntryRegisters(JitFrameLayout* frameStackAddress) {
+  // Not supported, or not implemented yet.
+  // TODO: Implement along with the corresponding stack-walker changes, in
+  // coordination with the Gecko Profiler, see bug 1635987 and follow-ups.
+  return mozilla::Nothing{};
+}
+
+void JitRuntime::generateInvalidator(MacroAssembler& masm, Label* bailoutTail) {
+  // See large comment in x86's JitRuntime::generateInvalidator.
+
+  AutoCreatedBy acb(masm, "JitRuntime::generateInvalidator");
+
+  invalidatorOffset_ = startTrampolineCode(masm);
+
+  // At this point, one of two things has happened:
+  // 1) Execution has just returned from C code, which left the stack aligned
+  // 2) Execution has just returned from Ion code, which left the stack
+  // unaligned. The old return address should not matter, but we still want the
+  // stack to be aligned, and there is no good reason to automatically align it
+  // with a call to setupUnalignedABICall.
+  masm.as_bic(sp, sp, Imm8(7));
+  masm.startDataTransferM(IsStore, sp, DB, WriteBack);
+  // We don't have to push everything, but this is likely easier.
+  // Setting regs_.
+  for (uint32_t i = 0; i < Registers::Total; i++) {
+    masm.transferReg(Register::FromCode(i));
+  }
+  masm.finishDataTransfer();
+
+  // Since our datastructures for stack inspection are compile-time fixed,
+  // if there are only 16 double registers, then we need to reserve
+  // space on the stack for the missing 16.
+  if (FloatRegisters::ActualTotalPhys() != FloatRegisters::TotalPhys) {
+    ScratchRegisterScope scratch(masm);
+    int missingRegs =
+        FloatRegisters::TotalPhys - FloatRegisters::ActualTotalPhys();
+    masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp, scratch);
+  }
+
+  masm.startFloatTransferM(IsStore, sp, DB, WriteBack);
+  for (uint32_t i = 0; i < FloatRegisters::ActualTotalPhys(); i++) {
+    masm.transferFloatReg(FloatRegister(i, FloatRegister::Double));
+  }
+  masm.finishFloatTransfer();
+
+  masm.ma_mov(sp, r0);
+  // Reserve 8 bytes for the outparam to ensure alignment for
+  // setupAlignedABICall.
+  masm.reserveStack(sizeof(void*) * 2);
+  masm.mov(sp, r1);
+  using Fn =
+      bool (*)(InvalidationBailoutStack * sp, BaselineBailoutInfo * *info);
+  masm.setupAlignedABICall();
+  masm.passABIArg(r0);
+  masm.passABIArg(r1);
+  masm.callWithABI<Fn, InvalidationBailout>(
+      MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+
+  masm.pop(r2);  // Get bailoutInfo outparam.
+
+  // Pop the machine state and the dead frame.
+  masm.moveToStackPtr(FramePointer);
+
+  // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
+  masm.jump(bailoutTail);
+}
+
+void JitRuntime::generateArgumentsRectifier(MacroAssembler& masm,
+                                            ArgumentsRectifierKind kind) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateArgumentsRectifier");
+
+  switch (kind) {
+    case ArgumentsRectifierKind::Normal:
+      argumentsRectifierOffset_ = startTrampolineCode(masm);
+      break;
+    case ArgumentsRectifierKind::TrialInlining:
+      trialInliningArgumentsRectifierOffset_ = startTrampolineCode(masm);
+      break;
+  }
+  masm.pushReturnAddress();
+
+  // Frame prologue.
+  //
+  // NOTE: if this changes, fix the Baseline bailout code too!
+  // See BaselineStackBuilder::calculatePrevFramePtr and
+  // BaselineStackBuilder::buildRectifierFrame (in BaselineBailouts.cpp).
+  masm.push(FramePointer);
+  masm.mov(StackPointer, FramePointer);
+
+  static_assert(JitStackAlignment == sizeof(Value));
+
+  // Copy number of actual arguments into r0 and r8.
+  masm.loadNumActualArgs(FramePointer, r0);
+  masm.mov(r0, r8);
+
+  // Load the number of |undefined|s to push into r6.
+  masm.loadPtr(
+      Address(FramePointer, RectifierFrameLayout::offsetOfCalleeToken()), r1);
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_and(Imm32(CalleeTokenMask), r1, r6, scratch);
+  }
+  masm.loadFunctionArgCount(r6, r6);
+
+  masm.ma_sub(r6, r8, r2);
+
+  // Get the topmost argument.
+  {
+    ScratchRegisterScope scratch(masm);
+    masm.ma_alu(sp, lsl(r8, 3), r3, OpAdd);  // r3 <- sp + nargs * 8
+    masm.ma_add(r3, Imm32(sizeof(RectifierFrameLayout)), r3, scratch);
+  }
+
+  {
+    Label notConstructing;
+
+    masm.branchTest32(Assembler::Zero, r1,
+                      Imm32(CalleeToken_FunctionConstructing),
+                      &notConstructing);
+
+    // Add sizeof(Value) to overcome |this|
+    masm.as_extdtr(IsLoad, 64, true, Offset, r4, EDtrAddr(r3, EDtrOffImm(8)));
+    masm.as_extdtr(IsStore, 64, true, PreIndex, r4,
+                   EDtrAddr(sp, EDtrOffImm(-8)));
+
+    masm.bind(&notConstructing);
+  }
+
+  // Push undefined.
+  masm.moveValue(UndefinedValue(), ValueOperand(r5, r4));
+  {
+    Label undefLoopTop;
+    masm.bind(&undefLoopTop);
+    masm.as_extdtr(IsStore, 64, true, PreIndex, r4,
+                   EDtrAddr(sp, EDtrOffImm(-8)));
+    masm.as_sub(r2, r2, Imm8(1), SetCC);
+
+    masm.ma_b(&undefLoopTop, Assembler::NonZero);
+  }
+
+  // Push arguments, |nargs| + 1 times (to include |this|).
+  {
+    Label copyLoopTop;
+    masm.bind(&copyLoopTop);
+    masm.as_extdtr(IsLoad, 64, true, PostIndex, r4,
+                   EDtrAddr(r3, EDtrOffImm(-8)));
+    masm.as_extdtr(IsStore, 64, true, PreIndex, r4,
+                   EDtrAddr(sp, EDtrOffImm(-8)));
+
+    masm.as_sub(r8, r8, Imm8(1), SetCC);
+    masm.ma_b(&copyLoopTop, Assembler::NotSigned);
+  }
+
+  // Construct JitFrameLayout.
+  masm.ma_push(r1);  // callee token
+  masm.pushFrameDescriptorForJitCall(FrameType::Rectifier, r0, r0);
+
+  // Call the target function.
+  masm.andPtr(Imm32(CalleeTokenMask), r1);
+  switch (kind) {
+    case ArgumentsRectifierKind::Normal:
+      masm.loadJitCodeRaw(r1, r3);
+      argumentsRectifierReturnOffset_ = masm.callJitNoProfiler(r3);
+      break;
+    case ArgumentsRectifierKind::TrialInlining:
+      Label noBaselineScript, done;
+      masm.loadBaselineJitCodeRaw(r1, r3, &noBaselineScript);
+      masm.callJitNoProfiler(r3);
+      masm.jump(&done);
+
+      // See BaselineCacheIRCompiler::emitCallInlinedFunction.
+      masm.bind(&noBaselineScript);
+      masm.loadJitCodeRaw(r1, r3);
+      masm.callJitNoProfiler(r3);
+      masm.bind(&done);
+      break;
+  }
+
+  masm.mov(FramePointer, StackPointer);
+  masm.pop(FramePointer);
+  masm.ret();
+}
+
+static void PushBailoutFrame(MacroAssembler& masm, Register spArg) {
+#ifdef ENABLE_WASM_SIMD
+#  error "Needs more careful logic if SIMD is enabled"
+#endif
+
+  // STEP 1a: Save our register sets to the stack so Bailout() can read
+  // everything.
+  // sp % 8 == 0
+
+  masm.startDataTransferM(IsStore, sp, DB, WriteBack);
+  // We don't have to push everything, but this is likely easier.
+  // Setting regs_.
+  for (uint32_t i = 0; i < Registers::Total; i++) {
+    masm.transferReg(Register::FromCode(i));
+  }
+  masm.finishDataTransfer();
+
+  ScratchRegisterScope scratch(masm);
+
+  // Since our datastructures for stack inspection are compile-time fixed,
+  // if there are only 16 double registers, then we need to reserve
+  // space on the stack for the missing 16.
+  if (FloatRegisters::ActualTotalPhys() != FloatRegisters::TotalPhys) {
+    int missingRegs =
+        FloatRegisters::TotalPhys - FloatRegisters::ActualTotalPhys();
+    masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp, scratch);
+  }
+  masm.startFloatTransferM(IsStore, sp, DB, WriteBack);
+  for (uint32_t i = 0; i < FloatRegisters::ActualTotalPhys(); i++) {
+    masm.transferFloatReg(FloatRegister(i, FloatRegister::Double));
+  }
+  masm.finishFloatTransfer();
+
+  // The current stack pointer is the first argument to jit::Bailout.
+  masm.ma_mov(sp, spArg);
+}
+
+static void GenerateBailoutThunk(MacroAssembler& masm, Label* bailoutTail) {
+  PushBailoutFrame(masm, r0);
+
+  // Make space for Bailout's bailoutInfo outparam.
+  masm.reserveStack(sizeof(void*));
+  masm.mov(sp, r1);
+  using Fn = bool (*)(BailoutStack * sp, BaselineBailoutInfo * *info);
+  masm.setupAlignedABICall();
+
+  masm.passABIArg(r0);
+  masm.passABIArg(r1);
+
+  masm.callWithABI<Fn, Bailout>(MoveOp::GENERAL,
+                                CheckUnsafeCallWithABI::DontCheckOther);
+  masm.pop(r2);  // Get the bailoutInfo outparam.
+
+  // Remove both the bailout frame and the topmost Ion frame's stack.
+  masm.moveToStackPtr(FramePointer);
+
+  // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
+  masm.jump(bailoutTail);
+}
+
+void JitRuntime::generateBailoutHandler(MacroAssembler& masm,
+                                        Label* bailoutTail) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateBailoutHandler");
+
+  bailoutHandlerOffset_ = startTrampolineCode(masm);
+
+  GenerateBailoutThunk(masm, bailoutTail);
+}
+
+bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm,
+                                   const VMFunctionData& f, DynFn nativeFun,
+                                   uint32_t* wrapperOffset) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateVMWrapper");
+
+  *wrapperOffset = startTrampolineCode(masm);
+
+  AllocatableGeneralRegisterSet regs(Register::Codes::WrapperMask);
+
+  static_assert(
+      (Register::Codes::VolatileMask & ~Register::Codes::WrapperMask) == 0,
+      "Wrapper register set must be a superset of Volatile register set.");
+
+  // The context is the first argument; r0 is the first argument register.
+  Register cxreg = r0;
+  regs.take(cxreg);
+
+  // Stack is:
+  //    ... frame ...
+  //  +8  [args] + argPadding
+  //  +0  ExitFrame
+  //
+  // If it isn't a tail call, then the return address needs to be saved.
+  // Push the frame pointer to finish the exit frame, then link it up.
+  if (f.expectTailCall == NonTailCall) {
+    masm.pushReturnAddress();
+  }
+  masm.Push(FramePointer);
+  masm.moveStackPtrTo(FramePointer);
+  masm.loadJSContext(cxreg);
+  masm.enterExitFrame(cxreg, regs.getAny(), &f);
+
+  // Save the base of the argument set stored on the stack.
+  Register argsBase = InvalidReg;
+  if (f.explicitArgs) {
+    argsBase = r5;
+    regs.take(argsBase);
+    ScratchRegisterScope scratch(masm);
+    masm.ma_add(sp, Imm32(ExitFrameLayout::SizeWithFooter()), argsBase,
+                scratch);
+  }
+
+  // Reserve space for the outparameter.
+  Register outReg = InvalidReg;
+  switch (f.outParam) {
+    case Type_Value:
+      outReg = r4;
+      regs.take(outReg);
+      masm.reserveStack(sizeof(Value));
+      masm.ma_mov(sp, outReg);
+      break;
+
+    case Type_Handle:
+      outReg = r4;
+      regs.take(outReg);
+      masm.PushEmptyRooted(f.outParamRootType);
+      masm.ma_mov(sp, outReg);
+      break;
+
+    case Type_Int32:
+    case Type_Pointer:
+    case Type_Bool:
+      outReg = r4;
+      regs.take(outReg);
+      masm.reserveStack(sizeof(int32_t));
+      masm.ma_mov(sp, outReg);
+      break;
+
+    case Type_Double:
+      outReg = r4;
+      regs.take(outReg);
+      masm.reserveStack(sizeof(double));
+      masm.ma_mov(sp, outReg);
+      break;
+
+    default:
+      MOZ_ASSERT(f.outParam == Type_Void);
+      break;
+  }
+
+  masm.setupUnalignedABICall(regs.getAny());
+  masm.passABIArg(cxreg);
+
+  size_t argDisp = 0;
+
+  // Copy any arguments.
+  for (uint32_t explicitArg = 0; explicitArg < f.explicitArgs; explicitArg++) {
+    switch (f.argProperties(explicitArg)) {
+      case VMFunctionData::WordByValue:
+        masm.passABIArg(MoveOperand(argsBase, argDisp), MoveOp::GENERAL);
+        argDisp += sizeof(void*);
+        break;
+      case VMFunctionData::DoubleByValue:
+        // Values should be passed by reference, not by value, so we assert
+        // that the argument is a double-precision float.
+        MOZ_ASSERT(f.argPassedInFloatReg(explicitArg));
+        masm.passABIArg(MoveOperand(argsBase, argDisp), MoveOp::DOUBLE);
+        argDisp += sizeof(double);
+        break;
+      case VMFunctionData::WordByRef:
+        masm.passABIArg(
+            MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress),
+            MoveOp::GENERAL);
+        argDisp += sizeof(void*);
+        break;
+      case VMFunctionData::DoubleByRef:
+        masm.passABIArg(
+            MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress),
+            MoveOp::GENERAL);
+        argDisp += 2 * sizeof(void*);
+        break;
+    }
+  }
+
+  // Copy the implicit outparam, if any.
+  if (outReg != InvalidReg) {
+    masm.passABIArg(outReg);
+  }
+
+  masm.callWithABI(nativeFun, MoveOp::GENERAL,
+                   CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+  // Test for failure.
+  switch (f.failType()) {
+    case Type_Cell:
+      masm.branchTestPtr(Assembler::Zero, r0, r0, masm.failureLabel());
+      break;
+    case Type_Bool:
+      masm.branchIfFalseBool(r0, masm.failureLabel());
+      break;
+    case Type_Void:
+      break;
+    default:
+      MOZ_CRASH("unknown failure kind");
+  }
+
+  // Load the outparam and free any allocated stack.
+  switch (f.outParam) {
+    case Type_Handle:
+      masm.popRooted(f.outParamRootType, ReturnReg, JSReturnOperand);
+      break;
+
+    case Type_Value:
+      masm.loadValue(Address(sp, 0), JSReturnOperand);
+      masm.freeStack(sizeof(Value));
+      break;
+
+    case Type_Int32:
+    case Type_Pointer:
+      masm.load32(Address(sp, 0), ReturnReg);
+      masm.freeStack(sizeof(int32_t));
+      break;
+
+    case Type_Bool:
+      masm.load8ZeroExtend(Address(sp, 0), ReturnReg);
+      masm.freeStack(sizeof(int32_t));
+      break;
+
+    case Type_Double:
+      masm.loadDouble(Address(sp, 0), ReturnDoubleReg);
+      masm.freeStack(sizeof(double));
+      break;
+
+    default:
+      MOZ_ASSERT(f.outParam == Type_Void);
+      break;
+  }
+
+  // Until C++ code is instrumented against Spectre, prevent speculative
+  // execution from returning any private data.
+  if (f.returnsData() && JitOptions.spectreJitToCxxCalls) {
+    masm.speculationBarrier();
+  }
+
+  // Pop ExitFooterFrame and the frame pointer.
+  masm.leaveExitFrame(0);
+  masm.pop(FramePointer);
+
+  // Return. Subtract sizeof(void*) for the frame pointer.
+  masm.retn(Imm32(sizeof(ExitFrameLayout) - sizeof(void*) +
+                  f.explicitStackSlots() * sizeof(void*) +
+                  f.extraValuesToPop * sizeof(Value)));
+
+  return true;
+}
+
+uint32_t JitRuntime::generatePreBarrier(JSContext* cx, MacroAssembler& masm,
+                                        MIRType type) {
+  AutoCreatedBy acb(masm, "JitRuntime::generatePreBarrier");
+
+  uint32_t offset = startTrampolineCode(masm);
+
+  masm.pushReturnAddress();
+
+  static_assert(PreBarrierReg == r1);
+  Register temp1 = r2;
+  Register temp2 = r3;
+  Register temp3 = r4;
+  masm.push(temp1);
+  masm.push(temp2);
+  masm.push(temp3);
+
+  Label noBarrier;
+  masm.emitPreBarrierFastPath(cx->runtime(), type, temp1, temp2, temp3,
+                              &noBarrier);
+
+  // Call into C++ to mark this GC thing.
+  masm.pop(temp3);
+  masm.pop(temp2);
+  masm.pop(temp1);
+
+  LiveRegisterSet save;
+  save.set() =
+      RegisterSet(GeneralRegisterSet(Registers::VolatileMask),
+                  FloatRegisterSet(FloatRegisters::VolatileDoubleMask));
+  masm.PushRegsInMask(save);
+
+  masm.movePtr(ImmPtr(cx->runtime()), r0);
+
+  masm.setupUnalignedABICall(r2);
+  masm.passABIArg(r0);
+  masm.passABIArg(r1);
+  masm.callWithABI(JitPreWriteBarrier(type));
+  masm.PopRegsInMask(save);
+  masm.ret();
+
+  masm.bind(&noBarrier);
+  masm.pop(temp3);
+  masm.pop(temp2);
+  masm.pop(temp1);
+  masm.ret();
+
+  return offset;
+}
+
+void JitRuntime::generateBailoutTailStub(MacroAssembler& masm,
+                                         Label* bailoutTail) {
+  AutoCreatedBy acb(masm, "JitRuntime::generateBailoutTailStub");
+
+  masm.bind(bailoutTail);
+  masm.generateBailoutTail(r1, r2);
+}
diff --git a/js/src/jit/arm/disasm/Constants-arm.cpp b/js/src/jit/arm/disasm/Constants-arm.cpp
new file mode 100644
index 0000000000..408e2df686
--- /dev/null
+++ b/js/src/jit/arm/disasm/Constants-arm.cpp
@@ -0,0 +1,117 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+// Copyright 2009 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "jit/arm/disasm/Constants-arm.h"
+
+#ifdef JS_DISASM_ARM
+
+namespace js {
+namespace jit {
+namespace disasm {
+
+double Instruction::DoubleImmedVmov() const {
+  // Reconstruct a double from the immediate encoded in the vmov instruction.
+  //
+  //   instruction: [xxxxxxxx,xxxxabcd,xxxxxxxx,xxxxefgh]
+  //   double: [aBbbbbbb,bbcdefgh,00000000,00000000,
+  //            00000000,00000000,00000000,00000000]
+  //
+  // where B = ~b. Only the high 16 bits are affected.
+  uint64_t high16;
+  high16 = (Bits(17, 16) << 4) | Bits(3, 0);  // xxxxxxxx,xxcdefgh.
+  high16 |= (0xff * Bit(18)) << 6;            // xxbbbbbb,bbxxxxxx.
+  high16 |= (Bit(18) ^ 1) << 14;              // xBxxxxxx,xxxxxxxx.
+  high16 |= Bit(19) << 15;                    // axxxxxxx,xxxxxxxx.
+
+  uint64_t imm = high16 << 48;
+  double d;
+  memcpy(&d, &imm, 8);
+  return d;
+}
+
+// These register names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+const char* Registers::names_[kNumRegisters] = {
+    "r0", "r1", "r2",  "r3", "r4", "r5", "r6", "r7",
+    "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc",
+};
+
+// List of alias names which can be used when referring to ARM registers.
+const Registers::RegisterAlias Registers::aliases_[] = {
+    {10, "sl"},  {11, "r11"}, {12, "r12"},        {13, "r13"},
+    {14, "r14"}, {15, "r15"}, {kNoRegister, NULL}};
+
+const char* Registers::Name(int reg) {
+  const char* result;
+  if ((0 <= reg) && (reg < kNumRegisters)) {
+    result = names_[reg];
+  } else {
+    result = "noreg";
+  }
+  return result;
+}
+
+// Support for VFP registers s0 to s31 (d0 to d15) and d16-d31.
+// Note that "sN:sM" is the same as "dN/2" up to d15.
+// These register names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+const char* VFPRegisters::names_[kNumVFPRegisters] = {
+    "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8",  "s9",  "s10",
+    "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21",
+    "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", "d0",
+    "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",  "d8",  "d9",  "d10", "d11",
+    "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22",
+    "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
+
+const char* VFPRegisters::Name(int reg, bool is_double) {
+  MOZ_ASSERT((0 <= reg) && (reg < kNumVFPRegisters));
+  return names_[reg + (is_double ? kNumVFPSingleRegisters : 0)];
+}
+
+int VFPRegisters::Number(const char* name, bool* is_double) {
+  for (int i = 0; i < kNumVFPRegisters; i++) {
+    if (strcmp(names_[i], name) == 0) {
+      if (i < kNumVFPSingleRegisters) {
+        *is_double = false;
+        return i;
+      } else {
+        *is_double = true;
+        return i - kNumVFPSingleRegisters;
+      }
+    }
+  }
+
+  // No register with the requested name found.
+  return kNoRegister;
+}
+
+int Registers::Number(const char* name) {
+  // Look through the canonical names.
+  for (int i = 0; i < kNumRegisters; i++) {
+    if (strcmp(names_[i], name) == 0) {
+      return i;
+    }
+  }
+
+  // Look through the alias names.
+  int i = 0;
+  while (aliases_[i].reg != kNoRegister) {
+    if (strcmp(aliases_[i].name, name) == 0) {
+      return aliases_[i].reg;
+    }
+    i++;
+  }
+
+  // No register with the requested name found.
+  return kNoRegister;
+}
+
+}  // namespace disasm
+}  // namespace jit
+}  // namespace js
+
+#endif  // JS_DISASM_ARM
diff --git a/js/src/jit/arm/disasm/Constants-arm.h b/js/src/jit/arm/disasm/Constants-arm.h
new file mode 100644
index 0000000000..0128062b3f
--- /dev/null
+++ b/js/src/jit/arm/disasm/Constants-arm.h
@@ -0,0 +1,684 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef jit_arm_disasm_Constants_arm_h
+#define jit_arm_disasm_Constants_arm_h
+
+#ifdef JS_DISASM_ARM
+
+#  include "mozilla/Assertions.h"
+#  include "mozilla/Types.h"
+
+#  include <string.h>
+
+namespace js {
+namespace jit {
+namespace disasm {
+
+// Constant pool marker.
+// Use UDF, the permanently undefined instruction.
+const int kConstantPoolMarkerMask = 0xfff000f0;
+const int kConstantPoolMarker = 0xe7f000f0;
+const int kConstantPoolLengthMaxMask = 0xffff;
+
+inline int EncodeConstantPoolLength(int length) {
+  MOZ_ASSERT((length & kConstantPoolLengthMaxMask) == length);
+  return ((length & 0xfff0) << 4) | (length & 0xf);
+}
+
+inline int DecodeConstantPoolLength(int instr) {
+  MOZ_ASSERT((instr & kConstantPoolMarkerMask) == kConstantPoolMarker);
+  return ((instr >> 4) & 0xfff0) | (instr & 0xf);
+}
+
+// Used in code age prologue - ldr(pc, MemOperand(pc, -4))
+const int kCodeAgeJumpInstruction = 0xe51ff004;
+
+// Number of registers in normal ARM mode.
+const int kNumRegisters = 16;
+
+// VFP support.
+const int kNumVFPSingleRegisters = 32;
+const int kNumVFPDoubleRegisters = 32;
+const int kNumVFPRegisters = kNumVFPSingleRegisters + kNumVFPDoubleRegisters;
+
+// PC is register 15.
+const int kPCRegister = 15;
+const int kNoRegister = -1;
+
+// -----------------------------------------------------------------------------
+// Conditions.
+
+// Defines constants and accessor classes to assemble, disassemble and
+// simulate ARM instructions.
+//
+// Section references in the code refer to the "ARM Architecture Reference
+// Manual" from July 2005 (available at http://www.arm.com/miscPDFs/14128.pdf)
+//
+// Constants for specific fields are defined in their respective named enums.
+// General constants are in an anonymous enum in class Instr.
+
+// Values for the condition field as defined in section A3.2
+enum Condition {
+  kNoCondition = -1,
+
+  eq = 0 << 28,   // Z set            Equal.
+  ne = 1 << 28,   // Z clear          Not equal.
+  cs = 2 << 28,   // C set            Unsigned higher or same.
+  cc = 3 << 28,   // C clear          Unsigned lower.
+  mi = 4 << 28,   // N set            Negative.
+  pl = 5 << 28,   // N clear          Positive or zero.
+  vs = 6 << 28,   // V set            Overflow.
+  vc = 7 << 28,   // V clear          No overflow.
+  hi = 8 << 28,   // C set, Z clear   Unsigned higher.
+  ls = 9 << 28,   // C clear or Z set Unsigned lower or same.
+  ge = 10 << 28,  // N == V           Greater or equal.
+  lt = 11 << 28,  // N != V           Less than.
+  gt = 12 << 28,  // Z clear, N == V  Greater than.
+  le = 13 << 28,  // Z set or N != V  Less then or equal
+  al = 14 << 28,  //                  Always.
+
+  kSpecialCondition = 15 << 28,  // Special condition (refer to section A3.2.1).
+  kNumberOfConditions = 16,
+
+  // Aliases.
+  hs = cs,  // C set            Unsigned higher or same.
+  lo = cc   // C clear          Unsigned lower.
+};
+
+inline Condition NegateCondition(Condition cond) {
+  MOZ_ASSERT(cond != al);
+  return static_cast<Condition>(cond ^ ne);
+}
+
+// Commute a condition such that {a cond b == b cond' a}.
+inline Condition CommuteCondition(Condition cond) {
+  switch (cond) {
+    case lo:
+      return hi;
+    case hi:
+      return lo;
+    case hs:
+      return ls;
+    case ls:
+      return hs;
+    case lt:
+      return gt;
+    case gt:
+      return lt;
+    case ge:
+      return le;
+    case le:
+      return ge;
+    default:
+      return cond;
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Instructions encoding.
+
+// Instr is merely used by the Assembler to distinguish 32bit integers
+// representing instructions from usual 32 bit values.
+// Instruction objects are pointers to 32bit values, and provide methods to
+// access the various ISA fields.
+typedef int32_t Instr;
+
+// Opcodes for Data-processing instructions (instructions with a type 0 and 1)
+// as defined in section A3.4
+enum Opcode {
+  AND = 0 << 21,   // Logical AND.
+  EOR = 1 << 21,   // Logical Exclusive OR.
+  SUB = 2 << 21,   // Subtract.
+  RSB = 3 << 21,   // Reverse Subtract.
+  ADD = 4 << 21,   // Add.
+  ADC = 5 << 21,   // Add with Carry.
+  SBC = 6 << 21,   // Subtract with Carry.
+  RSC = 7 << 21,   // Reverse Subtract with Carry.
+  TST = 8 << 21,   // Test.
+  TEQ = 9 << 21,   // Test Equivalence.
+  CMP = 10 << 21,  // Compare.
+  CMN = 11 << 21,  // Compare Negated.
+  ORR = 12 << 21,  // Logical (inclusive) OR.
+  MOV = 13 << 21,  // Move.
+  BIC = 14 << 21,  // Bit Clear.
+  MVN = 15 << 21   // Move Not.
+};
+
+// The bits for bit 7-4 for some type 0 miscellaneous instructions.
+enum MiscInstructionsBits74 {
+  // With bits 22-21 01.
+  BX = 1 << 4,
+  BXJ = 2 << 4,
+  BLX = 3 << 4,
+  BKPT = 7 << 4,
+
+  // With bits 22-21 11.
+  CLZ = 1 << 4
+};
+
+// Load and store exclusive instructions.
+
+// Bit positions.
+enum {
+  ExclusiveOpHi = 24,    // Hi bit of opcode field
+  ExclusiveOpLo = 23,    // Lo bit of opcode field
+  ExclusiveSizeHi = 22,  // Hi bit of operand size field
+  ExclusiveSizeLo = 21,  // Lo bit of operand size field
+  ExclusiveLoad = 20     // Bit indicating load
+};
+
+// Opcode bits for exclusive instructions.
+enum { ExclusiveOpcode = 3 };
+
+// Operand size, Bits(ExclusiveSizeHi,ExclusiveSizeLo).
+enum {
+  ExclusiveWord = 0,
+  ExclusiveDouble = 1,
+  ExclusiveByte = 2,
+  ExclusiveHalf = 3
+};
+
+// Instruction encoding bits and masks.
+enum {
+  H = 1 << 5,   // Halfword (or byte).
+  S6 = 1 << 6,  // Signed (or unsigned).
+  L = 1 << 20,  // Load (or store).
+  S = 1 << 20,  // Set condition code (or leave unchanged).
+  W = 1 << 21,  // Writeback base register (or leave unchanged).
+  A = 1 << 21,  // Accumulate in multiply instruction (or not).
+  B = 1 << 22,  // Unsigned byte (or word).
+  N = 1 << 22,  // Long (or short).
+  U = 1 << 23,  // Positive (or negative) offset/index.
+  P = 1 << 24,  // Offset/pre-indexed addressing (or post-indexed addressing).
+  I = 1 << 25,  // Immediate shifter operand (or not).
+  B0 = 1 << 0,
+  B4 = 1 << 4,
+  B5 = 1 << 5,
+  B6 = 1 << 6,
+  B7 = 1 << 7,
+  B8 = 1 << 8,
+  B9 = 1 << 9,
+  B12 = 1 << 12,
+  B16 = 1 << 16,
+  B17 = 1 << 17,
+  B18 = 1 << 18,
+  B19 = 1 << 19,
+  B20 = 1 << 20,
+  B21 = 1 << 21,
+  B22 = 1 << 22,
+  B23 = 1 << 23,
+  B24 = 1 << 24,
+  B25 = 1 << 25,
+  B26 = 1 << 26,
+  B27 = 1 << 27,
+  B28 = 1 << 28,
+
+  // Instruction bit masks.
+  kCondMask = 15 << 28,
+  kALUMask = 0x6f << 21,
+  kRdMask = 15 << 12,  // In str instruction.
+  kCoprocessorMask = 15 << 8,
+  kOpCodeMask = 15 << 21,  // In data-processing instructions.
+  kImm24Mask = (1 << 24) - 1,
+  kImm16Mask = (1 << 16) - 1,
+  kImm8Mask = (1 << 8) - 1,
+  kOff12Mask = (1 << 12) - 1,
+  kOff8Mask = (1 << 8) - 1
+};
+
+// -----------------------------------------------------------------------------
+// Addressing modes and instruction variants.
+
+// Condition code updating mode.
+enum SBit {
+  SetCC = 1 << 20,   // Set condition code.
+  LeaveCC = 0 << 20  // Leave condition code unchanged.
+};
+
+// Status register selection.
+enum SRegister { CPSR = 0 << 22, SPSR = 1 << 22 };
+
+// Shifter types for Data-processing operands as defined in section A5.1.2.
+enum ShiftOp {
+  LSL = 0 << 5,  // Logical shift left.
+  LSR = 1 << 5,  // Logical shift right.
+  ASR = 2 << 5,  // Arithmetic shift right.
+  ROR = 3 << 5,  // Rotate right.
+
+  // RRX is encoded as ROR with shift_imm == 0.
+  // Use a special code to make the distinction. The RRX ShiftOp is only used
+  // as an argument, and will never actually be encoded. The Assembler will
+  // detect it and emit the correct ROR shift operand with shift_imm == 0.
+  RRX = -1,
+  kNumberOfShifts = 4
+};
+
+// Status register fields.
+enum SRegisterField {
+  CPSR_c = CPSR | 1 << 16,
+  CPSR_x = CPSR | 1 << 17,
+  CPSR_s = CPSR | 1 << 18,
+  CPSR_f = CPSR | 1 << 19,
+  SPSR_c = SPSR | 1 << 16,
+  SPSR_x = SPSR | 1 << 17,
+  SPSR_s = SPSR | 1 << 18,
+  SPSR_f = SPSR | 1 << 19
+};
+
+// Status register field mask (or'ed SRegisterField enum values).
+typedef uint32_t SRegisterFieldMask;
+
+// Memory operand addressing mode.
+enum AddrMode {
+  // Bit encoding P U W.
+  Offset = (8 | 4 | 0) << 21,     // Offset (without writeback to base).
+  PreIndex = (8 | 4 | 1) << 21,   // Pre-indexed addressing with writeback.
+  PostIndex = (0 | 4 | 0) << 21,  // Post-indexed addressing with writeback.
+  NegOffset =
+      (8 | 0 | 0) << 21,  // Negative offset (without writeback to base).
+  NegPreIndex = (8 | 0 | 1) << 21,  // Negative pre-indexed with writeback.
+  NegPostIndex = (0 | 0 | 0) << 21  // Negative post-indexed with writeback.
+};
+
+// Load/store multiple addressing mode.
+enum BlockAddrMode {
+  // Bit encoding P U W .
+  da = (0 | 0 | 0) << 21,    // Decrement after.
+  ia = (0 | 4 | 0) << 21,    // Increment after.
+  db = (8 | 0 | 0) << 21,    // Decrement before.
+  ib = (8 | 4 | 0) << 21,    // Increment before.
+  da_w = (0 | 0 | 1) << 21,  // Decrement after with writeback to base.
+  ia_w = (0 | 4 | 1) << 21,  // Increment after with writeback to base.
+  db_w = (8 | 0 | 1) << 21,  // Decrement before with writeback to base.
+  ib_w = (8 | 4 | 1) << 21,  // Increment before with writeback to base.
+
+  // Alias modes for comparison when writeback does not matter.
+  da_x = (0 | 0 | 0) << 21,  // Decrement after.
+  ia_x = (0 | 4 | 0) << 21,  // Increment after.
+  db_x = (8 | 0 | 0) << 21,  // Decrement before.
+  ib_x = (8 | 4 | 0) << 21,  // Increment before.
+
+  kBlockAddrModeMask = (8 | 4 | 1) << 21
+};
+
+// Coprocessor load/store operand size.
+enum LFlag {
+  Long = 1 << 22,  // Long load/store coprocessor.
+  Short = 0 << 22  // Short load/store coprocessor.
+};
+
+// NEON data type
+enum NeonDataType {
+  NeonS8 = 0x1,             // U = 0, imm3 = 0b001
+  NeonS16 = 0x2,            // U = 0, imm3 = 0b010
+  NeonS32 = 0x4,            // U = 0, imm3 = 0b100
+  NeonU8 = 1 << 24 | 0x1,   // U = 1, imm3 = 0b001
+  NeonU16 = 1 << 24 | 0x2,  // U = 1, imm3 = 0b010
+  NeonU32 = 1 << 24 | 0x4,  // U = 1, imm3 = 0b100
+  NeonDataTypeSizeMask = 0x7,
+  NeonDataTypeUMask = 1 << 24
+};
+
+enum NeonListType { nlt_1 = 0x7, nlt_2 = 0xA, nlt_3 = 0x6, nlt_4 = 0x2 };
+
+enum NeonSize { Neon8 = 0x0, Neon16 = 0x1, Neon32 = 0x2, Neon64 = 0x3 };
+
+// -----------------------------------------------------------------------------
+// Supervisor Call (svc) specific support.
+
+// Special Software Interrupt codes when used in the presence of the ARM
+// simulator.
+// svc (formerly swi) provides a 24bit immediate value. Use bits 22:0 for
+// standard SoftwareInterrupCode. Bit 23 is reserved for the stop feature.
+enum SoftwareInterruptCodes {
+  // transition to C code
+  kCallRtRedirected = 0x10,
+  // break point
+  kBreakpoint = 0x20,
+  // stop
+  kStopCode = 1 << 23
+};
+const uint32_t kStopCodeMask = kStopCode - 1;
+const uint32_t kMaxStopCode = kStopCode - 1;
+const int32_t kDefaultStopCode = -1;
+
+// Type of VFP register. Determines register encoding.
+enum VFPRegPrecision { kSinglePrecision = 0, kDoublePrecision = 1 };
+
+// VFP FPSCR constants.
+enum VFPConversionMode { kFPSCRRounding = 0, kDefaultRoundToZero = 1 };
+
+// This mask does not include the "inexact" or "input denormal" cumulative
+// exceptions flags, because we usually don't want to check for it.
+const uint32_t kVFPExceptionMask = 0xf;
+const uint32_t kVFPInvalidOpExceptionBit = 1 << 0;
+const uint32_t kVFPOverflowExceptionBit = 1 << 2;
+const uint32_t kVFPUnderflowExceptionBit = 1 << 3;
+const uint32_t kVFPInexactExceptionBit = 1 << 4;
+const uint32_t kVFPFlushToZeroMask = 1 << 24;
+const uint32_t kVFPDefaultNaNModeControlBit = 1 << 25;
+
+const uint32_t kVFPNConditionFlagBit = 1 << 31;
+const uint32_t kVFPZConditionFlagBit = 1 << 30;
+const uint32_t kVFPCConditionFlagBit = 1 << 29;
+const uint32_t kVFPVConditionFlagBit = 1 << 28;
+
+// VFP rounding modes. See ARM DDI 0406B Page A2-29.
+enum VFPRoundingMode {
+  RN = 0 << 22,  // Round to Nearest.
+  RP = 1 << 22,  // Round towards Plus Infinity.
+  RM = 2 << 22,  // Round towards Minus Infinity.
+  RZ = 3 << 22,  // Round towards zero.
+
+  // Aliases.
+  kRoundToNearest = RN,
+  kRoundToPlusInf = RP,
+  kRoundToMinusInf = RM,
+  kRoundToZero = RZ
+};
+
+const uint32_t kVFPRoundingModeMask = 3 << 22;
+
+enum CheckForInexactConversion {
+  kCheckForInexactConversion,
+  kDontCheckForInexactConversion
+};
+
+// -----------------------------------------------------------------------------
+// Hints.
+
+// Branch hints are not used on the ARM.  They are defined so that they can
+// appear in shared function signatures, but will be ignored in ARM
+// implementations.
+enum Hint { no_hint };
+
+// Hints are not used on the arm.  Negating is trivial.
+inline Hint NegateHint(Hint ignored) { return no_hint; }
+
+// -----------------------------------------------------------------------------
+// Instruction abstraction.
+
+// The class Instruction enables access to individual fields defined in the ARM
+// architecture instruction set encoding as described in figure A3-1.
+// Note that the Assembler uses typedef int32_t Instr.
+//
+// Example: Test whether the instruction at ptr does set the condition code
+// bits.
+//
+// bool InstructionSetsConditionCodes(byte* ptr) {
+//   Instruction* instr = Instruction::At(ptr);
+//   int type = instr->TypeValue();
+//   return ((type == 0) || (type == 1)) && instr->HasS();
+// }
+//
+class Instruction {
+ public:
+  enum { kInstrSize = 4, kInstrSizeLog2 = 2, kPCReadOffset = 8 };
+
+  // Helper macro to define static accessors.
+  // We use the cast to char* trick to bypass the strict anti-aliasing rules.
+#  define DECLARE_STATIC_TYPED_ACCESSOR(return_type, Name) \
+    static inline return_type Name(Instr instr) {          \
+      char* temp = reinterpret_cast<char*>(&instr);        \
+      return reinterpret_cast<Instruction*>(temp)->Name(); \
+    }
+
+#  define DECLARE_STATIC_ACCESSOR(Name) DECLARE_STATIC_TYPED_ACCESSOR(int, Name)
+
+  // Get the raw instruction bits.
+  inline Instr InstructionBits() const {
+    return *reinterpret_cast<const Instr*>(this);
+  }
+
+  // Set the raw instruction bits to value.
+  inline void SetInstructionBits(Instr value) {
+    *reinterpret_cast<Instr*>(this) = value;
+  }
+
+  // Read one particular bit out of the instruction bits.
+  inline int Bit(int nr) const { return (InstructionBits() >> nr) & 1; }
+
+  // Read a bit field's value out of the instruction bits.
+  inline int Bits(int hi, int lo) const {
+    return (InstructionBits() >> lo) & ((2 << (hi - lo)) - 1);
+  }
+
+  // Read a bit field out of the instruction bits.
+  inline int BitField(int hi, int lo) const {
+    return InstructionBits() & (((2 << (hi - lo)) - 1) << lo);
+  }
+
+  // Static support.
+
+  // Read one particular bit out of the instruction bits.
+  static inline int Bit(Instr instr, int nr) { return (instr >> nr) & 1; }
+
+  // Read the value of a bit field out of the instruction bits.
+  static inline int Bits(Instr instr, int hi, int lo) {
+    return (instr >> lo) & ((2 << (hi - lo)) - 1);
+  }
+
+  // Read a bit field out of the instruction bits.
+  static inline int BitField(Instr instr, int hi, int lo) {
+    return instr & (((2 << (hi - lo)) - 1) << lo);
+  }
+
+  // Accessors for the different named fields used in the ARM encoding.
+  // The naming of these accessor corresponds to figure A3-1.
+  //
+  // Two kind of accessors are declared:
+  // - <Name>Field() will return the raw field, i.e. the field's bits at their
+  //   original place in the instruction encoding.
+  //   e.g. if instr is the 'addgt r0, r1, r2' instruction, encoded as
+  //   0xC0810002 ConditionField(instr) will return 0xC0000000.
+  // - <Name>Value() will return the field value, shifted back to bit 0.
+  //   e.g. if instr is the 'addgt r0, r1, r2' instruction, encoded as
+  //   0xC0810002 ConditionField(instr) will return 0xC.
+
+  // Generally applicable fields
+  inline Condition ConditionValue() const {
+    return static_cast<Condition>(Bits(31, 28));
+  }
+  inline Condition ConditionField() const {
+    return static_cast<Condition>(BitField(31, 28));
+  }
+  DECLARE_STATIC_TYPED_ACCESSOR(Condition, ConditionValue);
+  DECLARE_STATIC_TYPED_ACCESSOR(Condition, ConditionField);
+
+  inline int TypeValue() const { return Bits(27, 25); }
+  inline int SpecialValue() const { return Bits(27, 23); }
+
+  inline int RnValue() const { return Bits(19, 16); }
+  DECLARE_STATIC_ACCESSOR(RnValue);
+  inline int RdValue() const { return Bits(15, 12); }
+  DECLARE_STATIC_ACCESSOR(RdValue);
+
+  inline int CoprocessorValue() const { return Bits(11, 8); }
+  // Support for VFP.
+  // Vn(19-16) | Vd(15-12) |  Vm(3-0)
+  inline int VnValue() const { return Bits(19, 16); }
+  inline int VmValue() const { return Bits(3, 0); }
+  inline int VdValue() const { return Bits(15, 12); }
+  inline int NValue() const { return Bit(7); }
+  inline int MValue() const { return Bit(5); }
+  inline int DValue() const { return Bit(22); }
+  inline int RtValue() const { return Bits(15, 12); }
+  inline int PValue() const { return Bit(24); }
+  inline int UValue() const { return Bit(23); }
+  inline int Opc1Value() const { return (Bit(23) << 2) | Bits(21, 20); }
+  inline int Opc2Value() const { return Bits(19, 16); }
+  inline int Opc3Value() const { return Bits(7, 6); }
+  inline int SzValue() const { return Bit(8); }
+  inline int VLValue() const { return Bit(20); }
+  inline int VCValue() const { return Bit(8); }
+  inline int VAValue() const { return Bits(23, 21); }
+  inline int VBValue() const { return Bits(6, 5); }
+  inline int VFPNRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 16, 7);
+  }
+  inline int VFPMRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 0, 5);
+  }
+  inline int VFPDRegValue(VFPRegPrecision pre) {
+    return VFPGlueRegValue(pre, 12, 22);
+  }
+
+  // Fields used in Data processing instructions
+  inline int OpcodeValue() const { return static_cast<Opcode>(Bits(24, 21)); }
+  inline Opcode OpcodeField() const {
+    return static_cast<Opcode>(BitField(24, 21));
+  }
+  inline int SValue() const { return Bit(20); }
+  // with register
+  inline int RmValue() const { return Bits(3, 0); }
+  DECLARE_STATIC_ACCESSOR(RmValue);
+  inline int ShiftValue() const { return static_cast<ShiftOp>(Bits(6, 5)); }
+  inline ShiftOp ShiftField() const {
+    return static_cast<ShiftOp>(BitField(6, 5));
+  }
+  inline int RegShiftValue() const { return Bit(4); }
+  inline int RsValue() const { return Bits(11, 8); }
+  inline int ShiftAmountValue() const { return Bits(11, 7); }
+  // with immediate
+  inline int RotateValue() const { return Bits(11, 8); }
+  DECLARE_STATIC_ACCESSOR(RotateValue);
+  inline int Immed8Value() const { return Bits(7, 0); }
+  DECLARE_STATIC_ACCESSOR(Immed8Value);
+  inline int Immed4Value() const { return Bits(19, 16); }
+  inline int ImmedMovwMovtValue() const {
+    return Immed4Value() << 12 | Offset12Value();
+  }
+  DECLARE_STATIC_ACCESSOR(ImmedMovwMovtValue);
+
+  // Fields used in Load/Store instructions
+  inline int PUValue() const { return Bits(24, 23); }
+  inline int PUField() const { return BitField(24, 23); }
+  inline int BValue() const { return Bit(22); }
+  inline int WValue() const { return Bit(21); }
+  inline int LValue() const { return Bit(20); }
+  // with register uses same fields as Data processing instructions above
+  // with immediate
+  inline int Offset12Value() const { return Bits(11, 0); }
+  // multiple
+  inline int RlistValue() const { return Bits(15, 0); }
+  // extra loads and stores
+  inline int SignValue() const { return Bit(6); }
+  inline int HValue() const { return Bit(5); }
+  inline int ImmedHValue() const { return Bits(11, 8); }
+  inline int ImmedLValue() const { return Bits(3, 0); }
+
+  // Fields used in Branch instructions
+  inline int LinkValue() const { return Bit(24); }
+  inline int SImmed24Value() const { return ((InstructionBits() << 8) >> 8); }
+
+  // Fields used in Software interrupt instructions
+  inline SoftwareInterruptCodes SvcValue() const {
+    return static_cast<SoftwareInterruptCodes>(Bits(23, 0));
+  }
+
+  // Test for special encodings of type 0 instructions (extra loads and stores,
+  // as well as multiplications).
+  inline bool IsSpecialType0() const { return (Bit(7) == 1) && (Bit(4) == 1); }
+
+  // Test for miscellaneous instructions encodings of type 0 instructions.
+  inline bool IsMiscType0() const {
+    return (Bit(24) == 1) && (Bit(23) == 0) && (Bit(20) == 0) &&
+           ((Bit(7) == 0));
+  }
+
+  // Test for a nop instruction, which falls under type 1.
+  inline bool IsNopType1() const { return Bits(24, 0) == 0x0120F000; }
+
+  // Test for a nop instruction, which falls under type 1.
+  inline bool IsCsdbType1() const { return Bits(24, 0) == 0x0120F014; }
+
+  // Test for a stop instruction.
+  inline bool IsStop() const {
+    return (TypeValue() == 7) && (Bit(24) == 1) && (SvcValue() >= kStopCode);
+  }
+
+  // Special accessors that test for existence of a value.
+  inline bool HasS() const { return SValue() == 1; }
+  inline bool HasB() const { return BValue() == 1; }
+  inline bool HasW() const { return WValue() == 1; }
+  inline bool HasL() const { return LValue() == 1; }
+  inline bool HasU() const { return UValue() == 1; }
+  inline bool HasSign() const { return SignValue() == 1; }
+  inline bool HasH() const { return HValue() == 1; }
+  inline bool HasLink() const { return LinkValue() == 1; }
+
+  // Decoding the double immediate in the vmov instruction.
+  double DoubleImmedVmov() const;
+
+  // Instructions are read of out a code stream. The only way to get a
+  // reference to an instruction is to convert a pointer. There is no way
+  // to allocate or create instances of class Instruction.
+  // Use the At(pc) function to create references to Instruction.
+  static Instruction* At(uint8_t* pc) {
+    return reinterpret_cast<Instruction*>(pc);
+  }
+
+ private:
+  // Join split register codes, depending on single or double precision.
+  // four_bit is the position of the least-significant bit of the four
+  // bit specifier. one_bit is the position of the additional single bit
+  // specifier.
+  inline int VFPGlueRegValue(VFPRegPrecision pre, int four_bit, int one_bit) {
+    if (pre == kSinglePrecision) {
+      return (Bits(four_bit + 3, four_bit) << 1) | Bit(one_bit);
+    }
+    return (Bit(one_bit) << 4) | Bits(four_bit + 3, four_bit);
+  }
+
+  // We need to prevent the creation of instances of class Instruction.
+  Instruction() = delete;
+  Instruction(const Instruction&) = delete;
+  void operator=(const Instruction&) = delete;
+};
+
+// Helper functions for converting between register numbers and names.
+class Registers {
+ public:
+  // Return the name of the register.
+  static const char* Name(int reg);
+
+  // Lookup the register number for the name provided.
+  static int Number(const char* name);
+
+  struct RegisterAlias {
+    int reg;
+    const char* name;
+  };
+
+ private:
+  static const char* names_[kNumRegisters];
+  static const RegisterAlias aliases_[];
+};
+
+// Helper functions for converting between VFP register numbers and names.
+class VFPRegisters {
+ public:
+  // Return the name of the register.
+  static const char* Name(int reg, bool is_double);
+
+  // Lookup the register number for the name provided.
+  // Set flag pointed by is_double to true if register
+  // is double-precision.
+  static int Number(const char* name, bool* is_double);
+
+ private:
+  static const char* names_[kNumVFPRegisters];
+};
+
+}  // namespace disasm
+}  // namespace jit
+}  // namespace js
+
+#endif  // JS_DISASM_ARM
+
+#endif  // jit_arm_disasm_Constants_arm_h
diff --git a/js/src/jit/arm/disasm/Disasm-arm.cpp b/js/src/jit/arm/disasm/Disasm-arm.cpp
new file mode 100644
index 0000000000..97f39e1331
--- /dev/null
+++ b/js/src/jit/arm/disasm/Disasm-arm.cpp
@@ -0,0 +1,2031 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// A Disassembler object is used to disassemble a block of code instruction by
+// instruction. The default implementation of the NameConverter object can be
+// overriden to modify register names or to do symbol lookup on addresses.
+//
+// The example below will disassemble a block of code and print it to stdout.
+//
+//   disasm::NameConverter converter;
+//   disasm::Disassembler d(converter);
+//   for (uint8_t* pc = begin; pc < end;) {
+//     disasm::EmbeddedVector<char, disasm::ReasonableBufferSize> buffer;
+//     uint8_t* prev_pc = pc;
+//     pc += d.InstructionDecode(buffer, pc);
+//     printf("%p    %08x      %s\n",
+//            prev_pc, *reinterpret_cast<int32_t*>(prev_pc), buffer);
+//   }
+//
+// The Disassembler class also has a convenience method to disassemble a block
+// of code into a FILE*, meaning that the above functionality could also be
+// achieved by just calling Disassembler::Disassemble(stdout, begin, end);
+
+#include "jit/arm/disasm/Disasm-arm.h"
+
+#ifdef JS_DISASM_ARM
+
+#  include <stdarg.h>
+#  include <stdio.h>
+#  include <string.h>
+
+#  include "jit/arm/disasm/Constants-arm.h"
+
+namespace js {
+namespace jit {
+namespace disasm {
+
+// Helper function for printing to a Vector.
+static int MOZ_FORMAT_PRINTF(2, 3)
+    SNPrintF(V8Vector<char> str, const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  int result = vsnprintf(str.start(), str.length(), format, args);
+  va_end(args);
+  return result;
+}
+
+//------------------------------------------------------------------------------
+
+// Decoder decodes and disassembles instructions into an output buffer.
+// It uses the converter to convert register names and call destinations into
+// more informative description.
+class Decoder {
+ public:
+  Decoder(const disasm::NameConverter& converter, V8Vector<char> out_buffer)
+      : converter_(converter), out_buffer_(out_buffer), out_buffer_pos_(0) {
+    out_buffer_[out_buffer_pos_] = '\0';
+  }
+
+  ~Decoder() {}
+
+  // Writes one disassembled instruction into 'buffer' (0-terminated).
+  // Returns the length of the disassembled machine instruction in bytes.
+  int InstructionDecode(uint8_t* instruction);
+
+  static bool IsConstantPoolAt(uint8_t* instr_ptr);
+  static int ConstantPoolSizeAt(uint8_t* instr_ptr);
+
+ private:
+  // Bottleneck functions to print into the out_buffer.
+  void PrintChar(const char ch);
+  void Print(const char* str);
+
+  // Printing of common values.
+  void PrintRegister(int reg);
+  void PrintSRegister(int reg);
+  void PrintDRegister(int reg);
+  int FormatVFPRegister(Instruction* instr, const char* format);
+  void PrintMovwMovt(Instruction* instr);
+  int FormatVFPinstruction(Instruction* instr, const char* format);
+  void PrintCondition(Instruction* instr);
+  void PrintShiftRm(Instruction* instr);
+  void PrintShiftImm(Instruction* instr);
+  void PrintShiftSat(Instruction* instr);
+  void PrintPU(Instruction* instr);
+  void PrintSoftwareInterrupt(SoftwareInterruptCodes svc);
+
+  // Handle formatting of instructions and their options.
+  int FormatRegister(Instruction* instr, const char* option);
+  void FormatNeonList(int Vd, int type);
+  void FormatNeonMemory(int Rn, int align, int Rm);
+  int FormatOption(Instruction* instr, const char* option);
+  void Format(Instruction* instr, const char* format);
+  void Unknown(Instruction* instr);
+
+  // Each of these functions decodes one particular instruction type, a 3-bit
+  // field in the instruction encoding.
+  // Types 0 and 1 are combined as they are largely the same except for the way
+  // they interpret the shifter operand.
+  void DecodeType01(Instruction* instr);
+  void DecodeType2(Instruction* instr);
+  void DecodeType3(Instruction* instr);
+  void DecodeType4(Instruction* instr);
+  void DecodeType5(Instruction* instr);
+  void DecodeType6(Instruction* instr);
+  // Type 7 includes special Debugger instructions.
+  int DecodeType7(Instruction* instr);
+  // For VFP support.
+  void DecodeTypeVFP(Instruction* instr);
+  void DecodeType6CoprocessorIns(Instruction* instr);
+
+  void DecodeSpecialCondition(Instruction* instr);
+
+  void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
+  void DecodeVCMP(Instruction* instr);
+  void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
+  void DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr);
+
+  const disasm::NameConverter& converter_;
+  V8Vector<char> out_buffer_;
+  int out_buffer_pos_;
+
+  // Disallow copy and assign.
+  Decoder(const Decoder&) = delete;
+  void operator=(const Decoder&) = delete;
+};
+
+// Support for assertions in the Decoder formatting functions.
+#  define STRING_STARTS_WITH(string, compare_string) \
+    (strncmp(string, compare_string, strlen(compare_string)) == 0)
+
+// Append the ch to the output buffer.
+void Decoder::PrintChar(const char ch) { out_buffer_[out_buffer_pos_++] = ch; }
+
+// Append the str to the output buffer.
+void Decoder::Print(const char* str) {
+  char cur = *str++;
+  while (cur != '\0' && (out_buffer_pos_ < int(out_buffer_.length() - 1))) {
+    PrintChar(cur);
+    cur = *str++;
+  }
+  out_buffer_[out_buffer_pos_] = 0;
+}
+
+// These condition names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+static const char* const cond_names[kNumberOfConditions] = {
+    "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+    "hi", "ls", "ge", "lt", "gt", "le", "",   "invalid",
+};
+
+// Print the condition guarding the instruction.
+void Decoder::PrintCondition(Instruction* instr) {
+  Print(cond_names[instr->ConditionValue()]);
+}
+
+// Print the register name according to the active name converter.
+void Decoder::PrintRegister(int reg) {
+  Print(converter_.NameOfCPURegister(reg));
+}
+
+// Print the VFP S register name according to the active name converter.
+void Decoder::PrintSRegister(int reg) { Print(VFPRegisters::Name(reg, false)); }
+
+// Print the VFP D register name according to the active name converter.
+void Decoder::PrintDRegister(int reg) { Print(VFPRegisters::Name(reg, true)); }
+
+// These shift names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+static const char* const shift_names[kNumberOfShifts] = {"lsl", "lsr", "asr",
+                                                         "ror"};
+
+// Print the register shift operands for the instruction. Generally used for
+// data processing instructions.
+void Decoder::PrintShiftRm(Instruction* instr) {
+  ShiftOp shift = instr->ShiftField();
+  int shift_index = instr->ShiftValue();
+  int shift_amount = instr->ShiftAmountValue();
+  int rm = instr->RmValue();
+
+  PrintRegister(rm);
+
+  if ((instr->RegShiftValue() == 0) && (shift == LSL) && (shift_amount == 0)) {
+    // Special case for using rm only.
+    return;
+  }
+  if (instr->RegShiftValue() == 0) {
+    // by immediate
+    if ((shift == ROR) && (shift_amount == 0)) {
+      Print(", RRX");
+      return;
+    } else if (((shift == LSR) || (shift == ASR)) && (shift_amount == 0)) {
+      shift_amount = 32;
+    }
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, ", %s #%d",
+                                shift_names[shift_index], shift_amount);
+  } else {
+    // by register
+    int rs = instr->RsValue();
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, ", %s ",
+                                shift_names[shift_index]);
+    PrintRegister(rs);
+  }
+}
+
+static inline uint32_t RotateRight32(uint32_t value, uint32_t shift) {
+  if (shift == 0) return value;
+  return (value >> shift) | (value << (32 - shift));
+}
+
+// Print the immediate operand for the instruction. Generally used for data
+// processing instructions.
+void Decoder::PrintShiftImm(Instruction* instr) {
+  int rotate = instr->RotateValue() * 2;
+  int immed8 = instr->Immed8Value();
+  int imm = RotateRight32(immed8, rotate);
+  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "#%d", imm);
+}
+
+// Print the optional shift and immediate used by saturating instructions.
+void Decoder::PrintShiftSat(Instruction* instr) {
+  int shift = instr->Bits(11, 7);
+  if (shift > 0) {
+    out_buffer_pos_ +=
+        SNPrintF(out_buffer_ + out_buffer_pos_, ", %s #%d",
+                 shift_names[instr->Bit(6) * 2], instr->Bits(11, 7));
+  }
+}
+
+// Print PU formatting to reduce complexity of FormatOption.
+void Decoder::PrintPU(Instruction* instr) {
+  switch (instr->PUField()) {
+    case da_x: {
+      Print("da");
+      break;
+    }
+    case ia_x: {
+      Print("ia");
+      break;
+    }
+    case db_x: {
+      Print("db");
+      break;
+    }
+    case ib_x: {
+      Print("ib");
+      break;
+    }
+    default: {
+      MOZ_CRASH();
+      break;
+    }
+  }
+}
+
+// Print SoftwareInterrupt codes. Factoring this out reduces the complexity of
+// the FormatOption method.
+void Decoder::PrintSoftwareInterrupt(SoftwareInterruptCodes svc) {
+  switch (svc) {
+    case kCallRtRedirected:
+      Print("call rt redirected");
+      return;
+    case kBreakpoint:
+      Print("breakpoint");
+      return;
+    default:
+      if (svc >= kStopCode) {
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d - 0x%x",
+                                    svc & kStopCodeMask, svc & kStopCodeMask);
+      } else {
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", svc);
+      }
+      return;
+  }
+}
+
+// Handle all register based formatting in this function to reduce the
+// complexity of FormatOption.
+int Decoder::FormatRegister(Instruction* instr, const char* format) {
+  MOZ_ASSERT(format[0] == 'r');
+  if (format[1] == 'n') {  // 'rn: Rn register
+    int reg = instr->RnValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 'd') {  // 'rd: Rd register
+    int reg = instr->RdValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 's') {  // 'rs: Rs register
+    int reg = instr->RsValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 'm') {  // 'rm: Rm register
+    int reg = instr->RmValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 't') {  // 'rt: Rt register
+    int reg = instr->RtValue();
+    PrintRegister(reg);
+    return 2;
+  } else if (format[1] == 'l') {
+    // 'rlist: register list for load and store multiple instructions
+    MOZ_ASSERT(STRING_STARTS_WITH(format, "rlist"));
+    int rlist = instr->RlistValue();
+    int reg = 0;
+    Print("{");
+    // Print register list in ascending order, by scanning the bit mask.
+    while (rlist != 0) {
+      if ((rlist & 1) != 0) {
+        PrintRegister(reg);
+        if ((rlist >> 1) != 0) {
+          Print(", ");
+        }
+      }
+      reg++;
+      rlist >>= 1;
+    }
+    Print("}");
+    return 5;
+  }
+  MOZ_CRASH();
+  return -1;
+}
+
+// Handle all VFP register based formatting in this function to reduce the
+// complexity of FormatOption.
+int Decoder::FormatVFPRegister(Instruction* instr, const char* format) {
+  MOZ_ASSERT((format[0] == 'S') || (format[0] == 'D'));
+
+  VFPRegPrecision precision =
+      format[0] == 'D' ? kDoublePrecision : kSinglePrecision;
+
+  int retval = 2;
+  int reg = -1;
+  if (format[1] == 'n') {
+    reg = instr->VFPNRegValue(precision);
+  } else if (format[1] == 'm') {
+    reg = instr->VFPMRegValue(precision);
+  } else if (format[1] == 'd') {
+    if ((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) &&
+        (instr->Bits(11, 9) == 0x5) && (instr->Bit(4) == 0x1)) {
+      // vmov.32 has Vd in a different place.
+      reg = instr->Bits(19, 16) | (instr->Bit(7) << 4);
+    } else {
+      reg = instr->VFPDRegValue(precision);
+    }
+
+    if (format[2] == '+') {
+      int immed8 = instr->Immed8Value();
+      if (format[0] == 'S') reg += immed8 - 1;
+      if (format[0] == 'D') reg += (immed8 / 2 - 1);
+    }
+    if (format[2] == '+') retval = 3;
+  } else {
+    MOZ_CRASH();
+  }
+
+  if (precision == kSinglePrecision) {
+    PrintSRegister(reg);
+  } else {
+    PrintDRegister(reg);
+  }
+
+  return retval;
+}
+
+int Decoder::FormatVFPinstruction(Instruction* instr, const char* format) {
+  Print(format);
+  return 0;
+}
+
+void Decoder::FormatNeonList(int Vd, int type) {
+  if (type == nlt_1) {
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "{d%d}", Vd);
+  } else if (type == nlt_2) {
+    out_buffer_pos_ +=
+        SNPrintF(out_buffer_ + out_buffer_pos_, "{d%d, d%d}", Vd, Vd + 1);
+  } else if (type == nlt_3) {
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                "{d%d, d%d, d%d}", Vd, Vd + 1, Vd + 2);
+  } else if (type == nlt_4) {
+    out_buffer_pos_ +=
+        SNPrintF(out_buffer_ + out_buffer_pos_, "{d%d, d%d, d%d, d%d}", Vd,
+                 Vd + 1, Vd + 2, Vd + 3);
+  }
+}
+
+void Decoder::FormatNeonMemory(int Rn, int align, int Rm) {
+  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "[r%d", Rn);
+  if (align != 0) {
+    out_buffer_pos_ +=
+        SNPrintF(out_buffer_ + out_buffer_pos_, ":%d", (1 << align) << 6);
+  }
+  if (Rm == 15) {
+    Print("]");
+  } else if (Rm == 13) {
+    Print("]!");
+  } else {
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "], r%d", Rm);
+  }
+}
+
+// Print the movw or movt instruction.
+void Decoder::PrintMovwMovt(Instruction* instr) {
+  int imm = instr->ImmedMovwMovtValue();
+  int rd = instr->RdValue();
+  PrintRegister(rd);
+  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, ", #%d", imm);
+}
+
+// FormatOption takes a formatting string and interprets it based on
+// the current instructions. The format string points to the first
+// character of the option string (the option escape has already been
+// consumed by the caller.)  FormatOption returns the number of
+// characters that were consumed from the formatting string.
+int Decoder::FormatOption(Instruction* instr, const char* format) {
+  switch (format[0]) {
+    case 'a': {  // 'a: accumulate multiplies
+      if (instr->Bit(21) == 0) {
+        Print("ul");
+      } else {
+        Print("la");
+      }
+      return 1;
+    }
+    case 'b': {  // 'b: byte loads or stores
+      if (instr->HasB()) {
+        Print("b");
+      }
+      return 1;
+    }
+    case 'c': {  // 'cond: conditional execution
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "cond"));
+      PrintCondition(instr);
+      return 4;
+    }
+    case 'd': {  // 'd: vmov double immediate.
+      double d = instr->DoubleImmedVmov();
+      out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "#%g", d);
+      return 1;
+    }
+    case 'f': {  // 'f: bitfield instructions - v7 and above.
+      uint32_t lsbit = instr->Bits(11, 7);
+      uint32_t width = instr->Bits(20, 16) + 1;
+      if (instr->Bit(21) == 0) {
+        // BFC/BFI:
+        // Bits 20-16 represent most-significant bit. Covert to width.
+        width -= lsbit;
+        MOZ_ASSERT(width > 0);
+      }
+      MOZ_ASSERT((width + lsbit) <= 32);
+      out_buffer_pos_ +=
+          SNPrintF(out_buffer_ + out_buffer_pos_, "#%d, #%d", lsbit, width);
+      return 1;
+    }
+    case 'h': {  // 'h: halfword operation for extra loads and stores
+      if (instr->HasH()) {
+        Print("h");
+      } else {
+        Print("b");
+      }
+      return 1;
+    }
+    case 'i': {  // 'i: immediate value from adjacent bits.
+      // Expects tokens in the form imm%02d@%02d, i.e. imm05@07, imm10@16
+      int width = (format[3] - '0') * 10 + (format[4] - '0');
+      int lsb = (format[6] - '0') * 10 + (format[7] - '0');
+
+      MOZ_ASSERT((width >= 1) && (width <= 32));
+      MOZ_ASSERT((lsb >= 0) && (lsb <= 31));
+      MOZ_ASSERT((width + lsb) <= 32);
+
+      out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d",
+                                  instr->Bits(width + lsb - 1, lsb));
+      return 8;
+    }
+    case 'l': {  // 'l: branch and link
+      if (instr->HasLink()) {
+        Print("l");
+      }
+      return 1;
+    }
+    case 'm': {
+      if (format[1] == 'w') {
+        // 'mw: movt/movw instructions.
+        PrintMovwMovt(instr);
+        return 2;
+      }
+      if (format[1] == 'e') {  // 'memop: load/store instructions.
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "memop"));
+        if (instr->HasL()) {
+          Print("ldr");
+        } else {
+          if ((instr->Bits(27, 25) == 0) && (instr->Bit(20) == 0) &&
+              (instr->Bits(7, 6) == 3) && (instr->Bit(4) == 1)) {
+            if (instr->Bit(5) == 1) {
+              Print("strd");
+            } else {
+              Print("ldrd");
+            }
+            return 5;
+          }
+          Print("str");
+        }
+        return 5;
+      }
+      // 'msg: for simulator break instructions
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "msg"));
+      uint8_t* str =
+          reinterpret_cast<uint8_t*>(instr->InstructionBits() & 0x0fffffff);
+      out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%s",
+                                  converter_.NameInCode(str));
+      return 3;
+    }
+    case 'o': {
+      if ((format[3] == '1') && (format[4] == '2')) {
+        // 'off12: 12-bit offset for load and store instructions
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "off12"));
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d",
+                                    instr->Offset12Value());
+        return 5;
+      } else if (format[3] == '0') {
+        // 'off0to3and8to19 16-bit immediate encoded in bits 19-8 and 3-0.
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "off0to3and8to19"));
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, "%d",
+                     (instr->Bits(19, 8) << 4) + instr->Bits(3, 0));
+        return 15;
+      }
+      // 'off8: 8-bit offset for extra load and store instructions
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "off8"));
+      int offs8 = (instr->ImmedHValue() << 4) | instr->ImmedLValue();
+      out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", offs8);
+      return 4;
+    }
+    case 'p': {  // 'pu: P and U bits for load and store instructions
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "pu"));
+      PrintPU(instr);
+      return 2;
+    }
+    case 'r': {
+      return FormatRegister(instr, format);
+    }
+    case 's': {
+      if (format[1] == 'h') {    // 'shift_op or 'shift_rm or 'shift_sat.
+        if (format[6] == 'o') {  // 'shift_op
+          MOZ_ASSERT(STRING_STARTS_WITH(format, "shift_op"));
+          if (instr->TypeValue() == 0) {
+            PrintShiftRm(instr);
+          } else {
+            MOZ_ASSERT(instr->TypeValue() == 1);
+            PrintShiftImm(instr);
+          }
+          return 8;
+        } else if (format[6] == 's') {  // 'shift_sat.
+          MOZ_ASSERT(STRING_STARTS_WITH(format, "shift_sat"));
+          PrintShiftSat(instr);
+          return 9;
+        } else {  // 'shift_rm
+          MOZ_ASSERT(STRING_STARTS_WITH(format, "shift_rm"));
+          PrintShiftRm(instr);
+          return 8;
+        }
+      } else if (format[1] == 'v') {  // 'svc
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "svc"));
+        PrintSoftwareInterrupt(instr->SvcValue());
+        return 3;
+      } else if (format[1] == 'i') {  // 'sign: signed extra loads and stores
+        MOZ_ASSERT(STRING_STARTS_WITH(format, "sign"));
+        if (instr->HasSign()) {
+          Print("s");
+        }
+        return 4;
+      }
+      // 's: S field of data processing instructions
+      if (instr->HasS()) {
+        Print("s");
+      }
+      return 1;
+    }
+    case 't': {  // 'target: target of branch instructions
+      MOZ_ASSERT(STRING_STARTS_WITH(format, "target"));
+      int off = (instr->SImmed24Value() << 2) + 8;
+      out_buffer_pos_ += SNPrintF(
+          out_buffer_ + out_buffer_pos_, "%+d -> %s", off,
+          converter_.NameOfAddress(reinterpret_cast<uint8_t*>(instr) + off));
+      return 6;
+    }
+    case 'u': {  // 'u: signed or unsigned multiplies
+      // The manual gets the meaning of bit 22 backwards in the multiply
+      // instruction overview on page A3.16.2.  The instructions that
+      // exist in u and s variants are the following:
+      // smull A4.1.87
+      // umull A4.1.129
+      // umlal A4.1.128
+      // smlal A4.1.76
+      // For these 0 means u and 1 means s.  As can be seen on their individual
+      // pages.  The other 18 mul instructions have the bit set or unset in
+      // arbitrary ways that are unrelated to the signedness of the instruction.
+      // None of these 18 instructions exist in both a 'u' and an 's' variant.
+
+      if (instr->Bit(22) == 0) {
+        Print("u");
+      } else {
+        Print("s");
+      }
+      return 1;
+    }
+    case 'v': {
+      return FormatVFPinstruction(instr, format);
+    }
+    case 'S':
+    case 'D': {
+      return FormatVFPRegister(instr, format);
+    }
+    case 'w': {  // 'w: W field of load and store instructions
+      if (instr->HasW()) {
+        Print("!");
+      }
+      return 1;
+    }
+    default: {
+      MOZ_CRASH();
+      break;
+    }
+  }
+  MOZ_CRASH();
+  return -1;
+}
+
+// Format takes a formatting string for a whole instruction and prints it into
+// the output buffer. All escaped options are handed to FormatOption to be
+// parsed further.
+void Decoder::Format(Instruction* instr, const char* format) {
+  char cur = *format++;
+  while ((cur != 0) && (out_buffer_pos_ < (out_buffer_.length() - 1))) {
+    if (cur == '\'') {  // Single quote is used as the formatting escape.
+      format += FormatOption(instr, format);
+    } else {
+      out_buffer_[out_buffer_pos_++] = cur;
+    }
+    cur = *format++;
+  }
+  out_buffer_[out_buffer_pos_] = '\0';
+}
+
+// The disassembler may end up decoding data inlined in the code. We do not want
+// it to crash if the data does not ressemble any known instruction.
+#  define VERIFY(condition) \
+    if (!(condition)) {     \
+      Unknown(instr);       \
+      return;               \
+    }
+
+// For currently unimplemented decodings the disassembler calls Unknown(instr)
+// which will just print "unknown" of the instruction bits.
+void Decoder::Unknown(Instruction* instr) { Format(instr, "unknown"); }
+
+void Decoder::DecodeType01(Instruction* instr) {
+  int type = instr->TypeValue();
+  if ((type == 0) && instr->IsSpecialType0()) {
+    // multiply instruction or extra loads and stores
+    if (instr->Bits(7, 4) == 9) {
+      if (instr->Bit(24) == 0) {
+        // multiply instructions
+        if (instr->Bit(23) == 0) {
+          if (instr->Bit(21) == 0) {
+            // The MUL instruction description (A 4.1.33) refers to Rd as being
+            // the destination for the operation, but it confusingly uses the
+            // Rn field to encode it.
+            Format(instr, "mul'cond's 'rn, 'rm, 'rs");
+          } else {
+            if (instr->Bit(22) == 0) {
+              // The MLA instruction description (A 4.1.28) refers to the order
+              // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
+              // Rn field to encode the Rd register and the Rd field to encode
+              // the Rn register.
+              Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+            } else {
+              // The MLS instruction description (A 4.1.29) refers to the order
+              // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
+              // Rn field to encode the Rd register and the Rd field to encode
+              // the Rn register.
+              Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
+            }
+          }
+        } else {
+          // The signed/long multiply instructions use the terms RdHi and RdLo
+          // when referring to the target registers. They are mapped to the Rn
+          // and Rd fields as follows:
+          // RdLo == Rd field
+          // RdHi == Rn field
+          // The order of registers is: <RdLo>, <RdHi>, <Rm>, <Rs>
+          Format(instr, "'um'al'cond's 'rd, 'rn, 'rm, 'rs");
+        }
+      } else {
+        if (instr->Bits(ExclusiveOpHi, ExclusiveOpLo) == ExclusiveOpcode) {
+          if (instr->Bit(ExclusiveLoad) == 1) {
+            switch (instr->Bits(ExclusiveSizeHi, ExclusiveSizeLo)) {
+              case ExclusiveWord:
+                Format(instr, "ldrex'cond 'rt, ['rn]");
+                break;
+              case ExclusiveDouble:
+                Format(instr, "ldrexd'cond 'rt, ['rn]");
+                break;
+              case ExclusiveByte:
+                Format(instr, "ldrexb'cond 'rt, ['rn]");
+                break;
+              case ExclusiveHalf:
+                Format(instr, "ldrexh'cond 'rt, ['rn]");
+                break;
+            }
+          } else {
+            // The documentation names the low four bits of the
+            // store-exclusive instructions "Rt" but canonically
+            // for disassembly they are really "Rm".
+            switch (instr->Bits(ExclusiveSizeHi, ExclusiveSizeLo)) {
+              case ExclusiveWord:
+                Format(instr, "strex'cond 'rd, 'rm, ['rn]");
+                break;
+              case ExclusiveDouble:
+                Format(instr, "strexd'cond 'rd, 'rm, ['rn]");
+                break;
+              case ExclusiveByte:
+                Format(instr, "strexb'cond 'rd, 'rm, ['rn]");
+                break;
+              case ExclusiveHalf:
+                Format(instr, "strexh'cond 'rd, 'rm, ['rn]");
+                break;
+            }
+          }
+        } else {
+          Unknown(instr);
+        }
+      }
+    } else if ((instr->Bit(20) == 0) && ((instr->Bits(7, 4) & 0xd) == 0xd)) {
+      // ldrd, strd
+      switch (instr->PUField()) {
+        case da_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond's 'rd, ['rn], -'rm");
+          } else {
+            Format(instr, "'memop'cond's 'rd, ['rn], #-'off8");
+          }
+          break;
+        }
+        case ia_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond's 'rd, ['rn], +'rm");
+          } else {
+            Format(instr, "'memop'cond's 'rd, ['rn], #+'off8");
+          }
+          break;
+        }
+        case db_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond's 'rd, ['rn, -'rm]'w");
+          } else {
+            Format(instr, "'memop'cond's 'rd, ['rn, #-'off8]'w");
+          }
+          break;
+        }
+        case ib_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond's 'rd, ['rn, +'rm]'w");
+          } else {
+            Format(instr, "'memop'cond's 'rd, ['rn, #+'off8]'w");
+          }
+          break;
+        }
+        default: {
+          // The PU field is a 2-bit field.
+          MOZ_CRASH();
+          break;
+        }
+      }
+    } else {
+      // extra load/store instructions
+      switch (instr->PUField()) {
+        case da_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn], -'rm");
+          } else {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn], #-'off8");
+          }
+          break;
+        }
+        case ia_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn], +'rm");
+          } else {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn], #+'off8");
+          }
+          break;
+        }
+        case db_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn, -'rm]'w");
+          } else {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn, #-'off8]'w");
+          }
+          break;
+        }
+        case ib_x: {
+          if (instr->Bit(22) == 0) {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn, +'rm]'w");
+          } else {
+            Format(instr, "'memop'cond'sign'h 'rd, ['rn, #+'off8]'w");
+          }
+          break;
+        }
+        default: {
+          // The PU field is a 2-bit field.
+          MOZ_CRASH();
+          break;
+        }
+      }
+      return;
+    }
+  } else if ((type == 0) && instr->IsMiscType0()) {
+    if (instr->Bits(22, 21) == 1) {
+      switch (instr->BitField(7, 4)) {
+        case BX:
+          Format(instr, "bx'cond 'rm");
+          break;
+        case BLX:
+          Format(instr, "blx'cond 'rm");
+          break;
+        case BKPT:
+          Format(instr, "bkpt 'off0to3and8to19");
+          break;
+        default:
+          Unknown(instr);  // not used by V8
+          break;
+      }
+    } else if (instr->Bits(22, 21) == 3) {
+      switch (instr->BitField(7, 4)) {
+        case CLZ:
+          Format(instr, "clz'cond 'rd, 'rm");
+          break;
+        default:
+          Unknown(instr);  // not used by V8
+          break;
+      }
+    } else {
+      Unknown(instr);  // not used by V8
+    }
+  } else if ((type == 1) && instr->IsNopType1()) {
+    Format(instr, "nop'cond");
+  } else if ((type == 1) && instr->IsCsdbType1()) {
+    Format(instr, "csdb'cond");
+  } else {
+    switch (instr->OpcodeField()) {
+      case AND: {
+        Format(instr, "and'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case EOR: {
+        Format(instr, "eor'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case SUB: {
+        Format(instr, "sub'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case RSB: {
+        Format(instr, "rsb'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case ADD: {
+        Format(instr, "add'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case ADC: {
+        Format(instr, "adc'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case SBC: {
+        Format(instr, "sbc'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case RSC: {
+        Format(instr, "rsc'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case TST: {
+        if (instr->HasS()) {
+          Format(instr, "tst'cond 'rn, 'shift_op");
+        } else {
+          Format(instr, "movw'cond 'mw");
+        }
+        break;
+      }
+      case TEQ: {
+        if (instr->HasS()) {
+          Format(instr, "teq'cond 'rn, 'shift_op");
+        } else {
+          // Other instructions matching this pattern are handled in the
+          // miscellaneous instructions part above.
+          MOZ_CRASH();
+        }
+        break;
+      }
+      case CMP: {
+        if (instr->HasS()) {
+          Format(instr, "cmp'cond 'rn, 'shift_op");
+        } else {
+          Format(instr, "movt'cond 'mw");
+        }
+        break;
+      }
+      case CMN: {
+        if (instr->HasS()) {
+          Format(instr, "cmn'cond 'rn, 'shift_op");
+        } else {
+          // Other instructions matching this pattern are handled in the
+          // miscellaneous instructions part above.
+          MOZ_CRASH();
+        }
+        break;
+      }
+      case ORR: {
+        Format(instr, "orr'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case MOV: {
+        Format(instr, "mov'cond's 'rd, 'shift_op");
+        break;
+      }
+      case BIC: {
+        Format(instr, "bic'cond's 'rd, 'rn, 'shift_op");
+        break;
+      }
+      case MVN: {
+        Format(instr, "mvn'cond's 'rd, 'shift_op");
+        break;
+      }
+      default: {
+        // The Opcode field is a 4-bit field.
+        MOZ_CRASH();
+        break;
+      }
+    }
+  }
+}
+
+void Decoder::DecodeType2(Instruction* instr) {
+  switch (instr->PUField()) {
+    case da_x: {
+      if (instr->HasW()) {
+        Unknown(instr);  // not used in V8
+        return;
+      }
+      Format(instr, "'memop'cond'b 'rd, ['rn], #-'off12");
+      break;
+    }
+    case ia_x: {
+      if (instr->HasW()) {
+        Unknown(instr);  // not used in V8
+        return;
+      }
+      Format(instr, "'memop'cond'b 'rd, ['rn], #+'off12");
+      break;
+    }
+    case db_x: {
+      Format(instr, "'memop'cond'b 'rd, ['rn, #-'off12]'w");
+      break;
+    }
+    case ib_x: {
+      Format(instr, "'memop'cond'b 'rd, ['rn, #+'off12]'w");
+      break;
+    }
+    default: {
+      // The PU field is a 2-bit field.
+      MOZ_CRASH();
+      break;
+    }
+  }
+}
+
+void Decoder::DecodeType3(Instruction* instr) {
+  switch (instr->PUField()) {
+    case da_x: {
+      VERIFY(!instr->HasW());
+      Format(instr, "'memop'cond'b 'rd, ['rn], -'shift_rm");
+      break;
+    }
+    case ia_x: {
+      if (instr->Bit(4) == 0) {
+        Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm");
+      } else {
+        if (instr->Bit(5) == 0) {
+          switch (instr->Bits(22, 21)) {
+            case 0:
+              if (instr->Bit(20) == 0) {
+                if (instr->Bit(6) == 0) {
+                  Format(instr, "pkhbt'cond 'rd, 'rn, 'rm, lsl #'imm05@07");
+                } else {
+                  if (instr->Bits(11, 7) == 0) {
+                    Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #32");
+                  } else {
+                    Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #'imm05@07");
+                  }
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 1:
+              MOZ_CRASH();
+              break;
+            case 2:
+              MOZ_CRASH();
+              break;
+            case 3:
+              Format(instr, "usat 'rd, #'imm05@16, 'rm'shift_sat");
+              break;
+          }
+        } else {
+          switch (instr->Bits(22, 21)) {
+            case 0:
+              MOZ_CRASH();
+              break;
+            case 1:
+              if (instr->Bits(9, 6) == 1) {
+                if (instr->Bit(20) == 0) {
+                  if (instr->Bits(19, 16) == 0xF) {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "sxtb'cond 'rd, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "sxtb'cond 'rd, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "sxtb'cond 'rd, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "sxtb'cond 'rd, 'rm, ror #24");
+                        break;
+                    }
+                  } else {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "sxtab'cond 'rd, 'rn, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "sxtab'cond 'rd, 'rn, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "sxtab'cond 'rd, 'rn, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "sxtab'cond 'rd, 'rn, 'rm, ror #24");
+                        break;
+                    }
+                  }
+                } else {
+                  if (instr->Bits(19, 16) == 0xF) {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "sxth'cond 'rd, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "sxth'cond 'rd, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "sxth'cond 'rd, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "sxth'cond 'rd, 'rm, ror #24");
+                        break;
+                    }
+                  } else {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "sxtah'cond 'rd, 'rn, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "sxtah'cond 'rd, 'rn, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "sxtah'cond 'rd, 'rn, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "sxtah'cond 'rd, 'rn, 'rm, ror #24");
+                        break;
+                    }
+                  }
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 2:
+              if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
+                if (instr->Bits(19, 16) == 0xF) {
+                  switch (instr->Bits(11, 10)) {
+                    case 0:
+                      Format(instr, "uxtb16'cond 'rd, 'rm");
+                      break;
+                    case 1:
+                      Format(instr, "uxtb16'cond 'rd, 'rm, ror #8");
+                      break;
+                    case 2:
+                      Format(instr, "uxtb16'cond 'rd, 'rm, ror #16");
+                      break;
+                    case 3:
+                      Format(instr, "uxtb16'cond 'rd, 'rm, ror #24");
+                      break;
+                  }
+                } else {
+                  MOZ_CRASH();
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+            case 3:
+              if ((instr->Bits(9, 6) == 1)) {
+                if ((instr->Bit(20) == 0)) {
+                  if (instr->Bits(19, 16) == 0xF) {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "uxtb'cond 'rd, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "uxtb'cond 'rd, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "uxtb'cond 'rd, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "uxtb'cond 'rd, 'rm, ror #24");
+                        break;
+                    }
+                  } else {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "uxtab'cond 'rd, 'rn, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #24");
+                        break;
+                    }
+                  }
+                } else {
+                  if (instr->Bits(19, 16) == 0xF) {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "uxth'cond 'rd, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "uxth'cond 'rd, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "uxth'cond 'rd, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "uxth'cond 'rd, 'rm, ror #24");
+                        break;
+                    }
+                  } else {
+                    switch (instr->Bits(11, 10)) {
+                      case 0:
+                        Format(instr, "uxtah'cond 'rd, 'rn, 'rm");
+                        break;
+                      case 1:
+                        Format(instr, "uxtah'cond 'rd, 'rn, 'rm, ror #8");
+                        break;
+                      case 2:
+                        Format(instr, "uxtah'cond 'rd, 'rn, 'rm, ror #16");
+                        break;
+                      case 3:
+                        Format(instr, "uxtah'cond 'rd, 'rn, 'rm, ror #24");
+                        break;
+                    }
+                  }
+                }
+              } else {
+                MOZ_CRASH();
+              }
+              break;
+          }
+        }
+      }
+      break;
+    }
+    case db_x: {
+      if (instr->Bits(22, 20) == 0x5) {
+        if (instr->Bits(7, 4) == 0x1) {
+          if (instr->Bits(15, 12) == 0xF) {
+            Format(instr, "smmul'cond 'rn, 'rm, 'rs");
+          } else {
+            // SMMLA (in V8 notation matching ARM ISA format)
+            Format(instr, "smmla'cond 'rn, 'rm, 'rs, 'rd");
+          }
+          break;
+        }
+      }
+      bool FLAG_enable_sudiv = true;  // Flag doesn't exist in our engine.
+      if (FLAG_enable_sudiv) {
+        if (instr->Bits(5, 4) == 0x1) {
+          if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
+            if (instr->Bit(21) == 0x1) {
+              // UDIV (in V8 notation matching ARM ISA format) rn = rm/rs
+              Format(instr, "udiv'cond'b 'rn, 'rm, 'rs");
+            } else {
+              // SDIV (in V8 notation matching ARM ISA format) rn = rm/rs
+              Format(instr, "sdiv'cond'b 'rn, 'rm, 'rs");
+            }
+            break;
+          }
+        }
+      }
+      Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
+      break;
+    }
+    case ib_x: {
+      if (instr->HasW() && (instr->Bits(6, 4) == 0x5)) {
+        uint32_t widthminus1 = static_cast<uint32_t>(instr->Bits(20, 16));
+        uint32_t lsbit = static_cast<uint32_t>(instr->Bits(11, 7));
+        uint32_t msbit = widthminus1 + lsbit;
+        if (msbit <= 31) {
+          if (instr->Bit(22)) {
+            Format(instr, "ubfx'cond 'rd, 'rm, 'f");
+          } else {
+            Format(instr, "sbfx'cond 'rd, 'rm, 'f");
+          }
+        } else {
+          MOZ_CRASH();
+        }
+      } else if (!instr->HasW() && (instr->Bits(6, 4) == 0x1)) {
+        uint32_t lsbit = static_cast<uint32_t>(instr->Bits(11, 7));
+        uint32_t msbit = static_cast<uint32_t>(instr->Bits(20, 16));
+        if (msbit >= lsbit) {
+          if (instr->RmValue() == 15) {
+            Format(instr, "bfc'cond 'rd, 'f");
+          } else {
+            Format(instr, "bfi'cond 'rd, 'rm, 'f");
+          }
+        } else {
+          MOZ_CRASH();
+        }
+      } else {
+        Format(instr, "'memop'cond'b 'rd, ['rn, +'shift_rm]'w");
+      }
+      break;
+    }
+    default: {
+      // The PU field is a 2-bit field.
+      MOZ_CRASH();
+      break;
+    }
+  }
+}
+
+void Decoder::DecodeType4(Instruction* instr) {
+  if (instr->Bit(22) != 0) {
+    // Privileged mode currently not supported.
+    Unknown(instr);
+  } else {
+    if (instr->HasL()) {
+      Format(instr, "ldm'cond'pu 'rn'w, 'rlist");
+    } else {
+      Format(instr, "stm'cond'pu 'rn'w, 'rlist");
+    }
+  }
+}
+
+void Decoder::DecodeType5(Instruction* instr) {
+  Format(instr, "b'l'cond 'target");
+}
+
+void Decoder::DecodeType6(Instruction* instr) {
+  DecodeType6CoprocessorIns(instr);
+}
+
+int Decoder::DecodeType7(Instruction* instr) {
+  if (instr->Bit(24) == 1) {
+    if (instr->SvcValue() >= kStopCode) {
+      Format(instr, "stop'cond 'svc");
+      // Also print the stop message. Its address is encoded
+      // in the following 4 bytes.
+      out_buffer_pos_ += SNPrintF(
+          out_buffer_ + out_buffer_pos_, "\n  %p  %08x       stop message: %s",
+          reinterpret_cast<void*>(instr + Instruction::kInstrSize),
+          *reinterpret_cast<uint32_t*>(instr + Instruction::kInstrSize),
+          *reinterpret_cast<char**>(instr + Instruction::kInstrSize));
+      // We have decoded 2 * Instruction::kInstrSize bytes.
+      return 2 * Instruction::kInstrSize;
+    } else {
+      Format(instr, "svc'cond 'svc");
+    }
+  } else {
+    DecodeTypeVFP(instr);
+  }
+  return Instruction::kInstrSize;
+}
+
+// void Decoder::DecodeTypeVFP(Instruction* instr)
+// vmov: Sn = Rt
+// vmov: Rt = Sn
+// vcvt: Dd = Sm
+// vcvt: Sd = Dm
+// vcvt.f64.s32 Dd, Dd, #<fbits>
+// Dd = vabs(Dm)
+// Sd = vabs(Sm)
+// Dd = vneg(Dm)
+// Sd = vneg(Sm)
+// Dd = vadd(Dn, Dm)
+// Sd = vadd(Sn, Sm)
+// Dd = vsub(Dn, Dm)
+// Sd = vsub(Sn, Sm)
+// Dd = vmul(Dn, Dm)
+// Sd = vmul(Sn, Sm)
+// Dd = vmla(Dn, Dm)
+// Sd = vmla(Sn, Sm)
+// Dd = vmls(Dn, Dm)
+// Sd = vmls(Sn, Sm)
+// Dd = vdiv(Dn, Dm)
+// Sd = vdiv(Sn, Sm)
+// vcmp(Dd, Dm)
+// vcmp(Sd, Sm)
+// Dd = vsqrt(Dm)
+// Sd = vsqrt(Sm)
+// vmrs
+// vmsr
+void Decoder::DecodeTypeVFP(Instruction* instr) {
+  VERIFY((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0));
+  VERIFY(instr->Bits(11, 9) == 0x5);
+
+  if (instr->Bit(4) == 0) {
+    if (instr->Opc1Value() == 0x7) {
+      // Other data processing instructions
+      if ((instr->Opc2Value() == 0x0) && (instr->Opc3Value() == 0x1)) {
+        // vmov register to register.
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vmov'cond.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vmov'cond.f32 'Sd, 'Sm");
+        }
+      } else if ((instr->Opc2Value() == 0x0) && (instr->Opc3Value() == 0x3)) {
+        // vabs
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vabs'cond.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vabs'cond.f32 'Sd, 'Sm");
+        }
+      } else if ((instr->Opc2Value() == 0x1) && (instr->Opc3Value() == 0x1)) {
+        // vneg
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vneg'cond.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vneg'cond.f32 'Sd, 'Sm");
+        }
+      } else if ((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3)) {
+        DecodeVCVTBetweenDoubleAndSingle(instr);
+      } else if ((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) {
+        DecodeVCVTBetweenFloatingPointAndInteger(instr);
+      } else if ((instr->Opc2Value() == 0xA) && (instr->Opc3Value() == 0x3) &&
+                 (instr->Bit(8) == 1)) {
+        // vcvt.f64.s32 Dd, Dd, #<fbits>
+        int fraction_bits = 32 - ((instr->Bits(3, 0) << 1) | instr->Bit(5));
+        Format(instr, "vcvt'cond.f64.s32 'Dd, 'Dd");
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, ", #%d", fraction_bits);
+      } else if (((instr->Opc2Value() >> 1) == 0x6) &&
+                 (instr->Opc3Value() & 0x1)) {
+        DecodeVCVTBetweenFloatingPointAndInteger(instr);
+      } else if (((instr->Opc2Value() == 0x4) || (instr->Opc2Value() == 0x5)) &&
+                 (instr->Opc3Value() & 0x1)) {
+        DecodeVCMP(instr);
+      } else if (((instr->Opc2Value() == 0x1)) && (instr->Opc3Value() == 0x3)) {
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vsqrt'cond.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vsqrt'cond.f32 'Sd, 'Sm");
+        }
+      } else if (instr->Opc3Value() == 0x0) {
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vmov'cond.f64 'Dd, 'd");
+        } else {
+          Unknown(instr);  // Not used by V8.
+        }
+      } else if (((instr->Opc2Value() == 0x6)) && instr->Opc3Value() == 0x3) {
+        // vrintz - round towards zero (truncate)
+        if (instr->SzValue() == 0x1) {
+          Format(instr, "vrintz'cond.f64.f64 'Dd, 'Dm");
+        } else {
+          Format(instr, "vrintz'cond.f32.f32 'Sd, 'Sm");
+        }
+      } else {
+        Unknown(instr);  // Not used by V8.
+      }
+    } else if (instr->Opc1Value() == 0x3) {
+      if (instr->SzValue() == 0x1) {
+        if (instr->Opc3Value() & 0x1) {
+          Format(instr, "vsub'cond.f64 'Dd, 'Dn, 'Dm");
+        } else {
+          Format(instr, "vadd'cond.f64 'Dd, 'Dn, 'Dm");
+        }
+      } else {
+        if (instr->Opc3Value() & 0x1) {
+          Format(instr, "vsub'cond.f32 'Sd, 'Sn, 'Sm");
+        } else {
+          Format(instr, "vadd'cond.f32 'Sd, 'Sn, 'Sm");
+        }
+      }
+    } else if ((instr->Opc1Value() == 0x2) && !(instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vmul'cond.f64 'Dd, 'Dn, 'Dm");
+      } else {
+        Format(instr, "vmul'cond.f32 'Sd, 'Sn, 'Sm");
+      }
+    } else if ((instr->Opc1Value() == 0x0) && !(instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vmla'cond.f64 'Dd, 'Dn, 'Dm");
+      } else {
+        Format(instr, "vmla'cond.f32 'Sd, 'Sn, 'Sm");
+      }
+    } else if ((instr->Opc1Value() == 0x0) && (instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vmls'cond.f64 'Dd, 'Dn, 'Dm");
+      } else {
+        Format(instr, "vmls'cond.f32 'Sd, 'Sn, 'Sm");
+      }
+    } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vdiv'cond.f64 'Dd, 'Dn, 'Dm");
+      } else {
+        Format(instr, "vdiv'cond.f32 'Sd, 'Sn, 'Sm");
+      }
+    } else {
+      Unknown(instr);  // Not used by V8.
+    }
+  } else {
+    if ((instr->VCValue() == 0x0) && (instr->VAValue() == 0x0)) {
+      DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
+    } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1) &&
+               (instr->Bit(23) == 0x0)) {
+      if (instr->Bit(21) == 0x0) {
+        Format(instr, "vmov'cond.32 'Dd[0], 'rt");
+      } else {
+        Format(instr, "vmov'cond.32 'Dd[1], 'rt");
+      }
+    } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1) &&
+               (instr->Bit(23) == 0x0)) {
+      if (instr->Bit(21) == 0x0) {
+        Format(instr, "vmov'cond.32 'rt, 'Dd[0]");
+      } else {
+        Format(instr, "vmov'cond.32 'rt, 'Dd[1]");
+      }
+    } else if ((instr->VCValue() == 0x0) && (instr->VAValue() == 0x7) &&
+               (instr->Bits(19, 16) == 0x1)) {
+      if (instr->VLValue() == 0) {
+        if (instr->Bits(15, 12) == 0xF) {
+          Format(instr, "vmsr'cond FPSCR, APSR");
+        } else {
+          Format(instr, "vmsr'cond FPSCR, 'rt");
+        }
+      } else {
+        if (instr->Bits(15, 12) == 0xF) {
+          Format(instr, "vmrs'cond APSR, FPSCR");
+        } else {
+          Format(instr, "vmrs'cond 'rt, FPSCR");
+        }
+      }
+    }
+  }
+}
+
+void Decoder::DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(
+    Instruction* instr) {
+  VERIFY((instr->Bit(4) == 1) && (instr->VCValue() == 0x0) &&
+         (instr->VAValue() == 0x0));
+
+  bool to_arm_register = (instr->VLValue() == 0x1);
+
+  if (to_arm_register) {
+    Format(instr, "vmov'cond 'rt, 'Sn");
+  } else {
+    Format(instr, "vmov'cond 'Sn, 'rt");
+  }
+}
+
+void Decoder::DecodeVCMP(Instruction* instr) {
+  VERIFY((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7));
+  VERIFY(((instr->Opc2Value() == 0x4) || (instr->Opc2Value() == 0x5)) &&
+         (instr->Opc3Value() & 0x1));
+
+  // Comparison.
+  bool dp_operation = (instr->SzValue() == 1);
+  bool raise_exception_for_qnan = (instr->Bit(7) == 0x1);
+
+  if (dp_operation && !raise_exception_for_qnan) {
+    if (instr->Opc2Value() == 0x4) {
+      Format(instr, "vcmp'cond.f64 'Dd, 'Dm");
+    } else if (instr->Opc2Value() == 0x5) {
+      Format(instr, "vcmp'cond.f64 'Dd, #0.0");
+    } else {
+      Unknown(instr);  // invalid
+    }
+  } else if (!raise_exception_for_qnan) {
+    if (instr->Opc2Value() == 0x4) {
+      Format(instr, "vcmp'cond.f32 'Sd, 'Sm");
+    } else if (instr->Opc2Value() == 0x5) {
+      Format(instr, "vcmp'cond.f32 'Sd, #0.0");
+    } else {
+      Unknown(instr);  // invalid
+    }
+  } else {
+    Unknown(instr);  // Not used by V8.
+  }
+}
+
+void Decoder::DecodeVCVTBetweenDoubleAndSingle(Instruction* instr) {
+  VERIFY((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7));
+  VERIFY((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3));
+
+  bool double_to_single = (instr->SzValue() == 1);
+
+  if (double_to_single) {
+    Format(instr, "vcvt'cond.f32.f64 'Sd, 'Dm");
+  } else {
+    Format(instr, "vcvt'cond.f64.f32 'Dd, 'Sm");
+  }
+}
+
+void Decoder::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
+  VERIFY((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7));
+  VERIFY(((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) ||
+         (((instr->Opc2Value() >> 1) == 0x6) && (instr->Opc3Value() & 0x1)));
+
+  bool to_integer = (instr->Bit(18) == 1);
+  bool dp_operation = (instr->SzValue() == 1);
+  if (to_integer) {
+    bool unsigned_integer = (instr->Bit(16) == 0);
+
+    if (dp_operation) {
+      if (unsigned_integer) {
+        Format(instr, "vcvt'cond.u32.f64 'Sd, 'Dm");
+      } else {
+        Format(instr, "vcvt'cond.s32.f64 'Sd, 'Dm");
+      }
+    } else {
+      if (unsigned_integer) {
+        Format(instr, "vcvt'cond.u32.f32 'Sd, 'Sm");
+      } else {
+        Format(instr, "vcvt'cond.s32.f32 'Sd, 'Sm");
+      }
+    }
+  } else {
+    bool unsigned_integer = (instr->Bit(7) == 0);
+
+    if (dp_operation) {
+      if (unsigned_integer) {
+        Format(instr, "vcvt'cond.f64.u32 'Dd, 'Sm");
+      } else {
+        Format(instr, "vcvt'cond.f64.s32 'Dd, 'Sm");
+      }
+    } else {
+      if (unsigned_integer) {
+        Format(instr, "vcvt'cond.f32.u32 'Sd, 'Sm");
+      } else {
+        Format(instr, "vcvt'cond.f32.s32 'Sd, 'Sm");
+      }
+    }
+  }
+}
+
+// Decode Type 6 coprocessor instructions.
+// Dm = vmov(Rt, Rt2)
+// <Rt, Rt2> = vmov(Dm)
+// Ddst = MEM(Rbase + 4*offset).
+// MEM(Rbase + 4*offset) = Dsrc.
+void Decoder::DecodeType6CoprocessorIns(Instruction* instr) {
+  VERIFY(instr->TypeValue() == 6);
+
+  if (instr->CoprocessorValue() == 0xA) {
+    switch (instr->OpcodeValue()) {
+      case 0x8:
+      case 0xA:
+        if (instr->HasL()) {
+          Format(instr, "vldr'cond 'Sd, ['rn - 4*'imm08@00]");
+        } else {
+          Format(instr, "vstr'cond 'Sd, ['rn - 4*'imm08@00]");
+        }
+        break;
+      case 0xC:
+      case 0xE:
+        if (instr->HasL()) {
+          Format(instr, "vldr'cond 'Sd, ['rn + 4*'imm08@00]");
+        } else {
+          Format(instr, "vstr'cond 'Sd, ['rn + 4*'imm08@00]");
+        }
+        break;
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7:
+      case 0x9:
+      case 0xB: {
+        bool to_vfp_register = (instr->VLValue() == 0x1);
+        if (to_vfp_register) {
+          Format(instr, "vldm'cond'pu 'rn'w, {'Sd-'Sd+}");
+        } else {
+          Format(instr, "vstm'cond'pu 'rn'w, {'Sd-'Sd+}");
+        }
+        break;
+      }
+      default:
+        Unknown(instr);  // Not used by V8.
+    }
+  } else if (instr->CoprocessorValue() == 0xB) {
+    switch (instr->OpcodeValue()) {
+      case 0x2:
+        // Load and store double to two GP registers
+        if (instr->Bits(7, 6) != 0 || instr->Bit(4) != 1) {
+          Unknown(instr);  // Not used by V8.
+        } else if (instr->HasL()) {
+          Format(instr, "vmov'cond 'rt, 'rn, 'Dm");
+        } else {
+          Format(instr, "vmov'cond 'Dm, 'rt, 'rn");
+        }
+        break;
+      case 0x8:
+      case 0xA:
+        if (instr->HasL()) {
+          Format(instr, "vldr'cond 'Dd, ['rn - 4*'imm08@00]");
+        } else {
+          Format(instr, "vstr'cond 'Dd, ['rn - 4*'imm08@00]");
+        }
+        break;
+      case 0xC:
+      case 0xE:
+        if (instr->HasL()) {
+          Format(instr, "vldr'cond 'Dd, ['rn + 4*'imm08@00]");
+        } else {
+          Format(instr, "vstr'cond 'Dd, ['rn + 4*'imm08@00]");
+        }
+        break;
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7:
+      case 0x9:
+      case 0xB: {
+        bool to_vfp_register = (instr->VLValue() == 0x1);
+        if (to_vfp_register) {
+          Format(instr, "vldm'cond'pu 'rn'w, {'Dd-'Dd+}");
+        } else {
+          Format(instr, "vstm'cond'pu 'rn'w, {'Dd-'Dd+}");
+        }
+        break;
+      }
+      default:
+        Unknown(instr);  // Not used by V8.
+    }
+  } else {
+    Unknown(instr);  // Not used by V8.
+  }
+}
+
+void Decoder::DecodeSpecialCondition(Instruction* instr) {
+  switch (instr->SpecialValue()) {
+    case 5:
+      if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
+          (instr->Bit(4) == 1)) {
+        // vmovl signed
+        if ((instr->VdValue() & 1) != 0) Unknown(instr);
+        int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
+        int Vm = (instr->Bit(5) << 4) | instr->VmValue();
+        int imm3 = instr->Bits(21, 19);
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                    "vmovl.s%d q%d, d%d", imm3 * 8, Vd, Vm);
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 7:
+      if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
+          (instr->Bit(4) == 1)) {
+        // vmovl unsigned
+        if ((instr->VdValue() & 1) != 0) Unknown(instr);
+        int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
+        int Vm = (instr->Bit(5) << 4) | instr->VmValue();
+        int imm3 = instr->Bits(21, 19);
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                    "vmovl.u%d q%d, d%d", imm3 * 8, Vd, Vm);
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 8:
+      if (instr->Bits(21, 20) == 0) {
+        // vst1
+        int Vd = (instr->Bit(22) << 4) | instr->VdValue();
+        int Rn = instr->VnValue();
+        int type = instr->Bits(11, 8);
+        int size = instr->Bits(7, 6);
+        int align = instr->Bits(5, 4);
+        int Rm = instr->VmValue();
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vst1.%d ",
+                                    (1 << size) << 3);
+        FormatNeonList(Vd, type);
+        Print(", ");
+        FormatNeonMemory(Rn, align, Rm);
+      } else if (instr->Bits(21, 20) == 2) {
+        // vld1
+        int Vd = (instr->Bit(22) << 4) | instr->VdValue();
+        int Rn = instr->VnValue();
+        int type = instr->Bits(11, 8);
+        int size = instr->Bits(7, 6);
+        int align = instr->Bits(5, 4);
+        int Rm = instr->VmValue();
+        out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vld1.%d ",
+                                    (1 << size) << 3);
+        FormatNeonList(Vd, type);
+        Print(", ");
+        FormatNeonMemory(Rn, align, Rm);
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 9:
+      if (instr->Bits(21, 20) == 0 && instr->Bits(9, 8) == 0) {
+        // vst1
+        int Vd = (instr->Bit(22) << 4) | instr->VdValue();
+        int Rn = instr->VnValue();
+        int size = instr->Bits(11, 10);
+        int index = instr->Bits(7, 5);
+        int align = instr->Bit(4);
+        int Rm = instr->VmValue();
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, "vst1.%d {d%d[%d]}, ",
+                     (1 << size) << 3, Vd, index);
+        FormatNeonMemory(Rn, align, Rm);
+      } else if (instr->Bits(21, 20) == 2 && instr->Bits(9, 8) == 0) {
+        // vld1
+        int Vd = (instr->Bit(22) << 4) | instr->VdValue();
+        int Rn = instr->VnValue();
+        int size = instr->Bits(11, 10);
+        int index = instr->Bits(7, 5);
+        int align = instr->Bit(4);
+        int Rm = instr->VmValue();
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, "vld1.%d {d%d[%d]}, ",
+                     (1 << size) << 3, Vd, index);
+        FormatNeonMemory(Rn, align, Rm);
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 0xA:
+      if (instr->Bits(22, 20) == 7) {
+        const char* option = "?";
+        switch (instr->Bits(3, 0)) {
+          case 2:
+            option = "oshst";
+            break;
+          case 3:
+            option = "osh";
+            break;
+          case 6:
+            option = "nshst";
+            break;
+          case 7:
+            option = "nsh";
+            break;
+          case 10:
+            option = "ishst";
+            break;
+          case 11:
+            option = "ish";
+            break;
+          case 14:
+            option = "st";
+            break;
+          case 15:
+            option = "sy";
+            break;
+        }
+        switch (instr->Bits(7, 4)) {
+          case 1:
+            Print("clrex");
+            break;
+          case 4:
+            out_buffer_pos_ +=
+                SNPrintF(out_buffer_ + out_buffer_pos_, "dsb %s", option);
+            break;
+          case 5:
+            out_buffer_pos_ +=
+                SNPrintF(out_buffer_ + out_buffer_pos_, "dmb %s", option);
+            break;
+          default:
+            Unknown(instr);
+        }
+        break;
+      }
+      [[fallthrough]];
+    case 0xB:
+      if ((instr->Bits(22, 20) == 5) && (instr->Bits(15, 12) == 0xf)) {
+        int Rn = instr->Bits(19, 16);
+        int offset = instr->Bits(11, 0);
+        if (offset == 0) {
+          out_buffer_pos_ +=
+              SNPrintF(out_buffer_ + out_buffer_pos_, "pld [r%d]", Rn);
+        } else if (instr->Bit(23) == 0) {
+          out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                      "pld [r%d, #-%d]", Rn, offset);
+        } else {
+          out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                      "pld [r%d, #+%d]", Rn, offset);
+        }
+      } else {
+        Unknown(instr);
+      }
+      break;
+    case 0x1D:
+      if (instr->Opc1Value() == 0x7 && instr->Bits(19, 18) == 0x2 &&
+          instr->Bits(11, 9) == 0x5 && instr->Bits(7, 6) == 0x1 &&
+          instr->Bit(4) == 0x0) {
+        // VRINTA, VRINTN, VRINTP, VRINTM (floating-point)
+        bool dp_operation = (instr->SzValue() == 1);
+        int rounding_mode = instr->Bits(17, 16);
+        switch (rounding_mode) {
+          case 0x0:
+            if (dp_operation) {
+              Format(instr, "vrinta.f64.f64 'Dd, 'Dm");
+            } else {
+              Unknown(instr);
+            }
+            break;
+          case 0x1:
+            if (dp_operation) {
+              Format(instr, "vrintn.f64.f64 'Dd, 'Dm");
+            } else {
+              Unknown(instr);
+            }
+            break;
+          case 0x2:
+            if (dp_operation) {
+              Format(instr, "vrintp.f64.f64 'Dd, 'Dm");
+            } else {
+              Unknown(instr);
+            }
+            break;
+          case 0x3:
+            if (dp_operation) {
+              Format(instr, "vrintm.f64.f64 'Dd, 'Dm");
+            } else {
+              Unknown(instr);
+            }
+            break;
+          default:
+            MOZ_CRASH();  // Case analysis is exhaustive.
+            break;
+        }
+      } else {
+        Unknown(instr);
+      }
+      break;
+    default:
+      Unknown(instr);
+      break;
+  }
+}
+
+#  undef VERIFIY
+
+bool Decoder::IsConstantPoolAt(uint8_t* instr_ptr) {
+  int instruction_bits = *(reinterpret_cast<int*>(instr_ptr));
+  return (instruction_bits & kConstantPoolMarkerMask) == kConstantPoolMarker;
+}
+
+int Decoder::ConstantPoolSizeAt(uint8_t* instr_ptr) {
+  if (IsConstantPoolAt(instr_ptr)) {
+    int instruction_bits = *(reinterpret_cast<int*>(instr_ptr));
+    return DecodeConstantPoolLength(instruction_bits);
+  } else {
+    return -1;
+  }
+}
+
+// Disassemble the instruction at *instr_ptr into the output buffer.
+int Decoder::InstructionDecode(uint8_t* instr_ptr) {
+  Instruction* instr = Instruction::At(instr_ptr);
+  // Print raw instruction bytes.
+  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%08x       ",
+                              instr->InstructionBits());
+  if (instr->ConditionField() == kSpecialCondition) {
+    DecodeSpecialCondition(instr);
+    return Instruction::kInstrSize;
+  }
+  int instruction_bits = *(reinterpret_cast<int*>(instr_ptr));
+  if ((instruction_bits & kConstantPoolMarkerMask) == kConstantPoolMarker) {
+    out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
+                                "constant pool begin (length %d)",
+                                DecodeConstantPoolLength(instruction_bits));
+    return Instruction::kInstrSize;
+  } else if (instruction_bits == kCodeAgeJumpInstruction) {
+    // The code age prologue has a constant immediatly following the jump
+    // instruction.
+    Instruction* target = Instruction::At(instr_ptr + Instruction::kInstrSize);
+    DecodeType2(instr);
+    SNPrintF(out_buffer_ + out_buffer_pos_, " (0x%08x)",
+             target->InstructionBits());
+    return 2 * Instruction::kInstrSize;
+  }
+  switch (instr->TypeValue()) {
+    case 0:
+    case 1: {
+      DecodeType01(instr);
+      break;
+    }
+    case 2: {
+      DecodeType2(instr);
+      break;
+    }
+    case 3: {
+      DecodeType3(instr);
+      break;
+    }
+    case 4: {
+      DecodeType4(instr);
+      break;
+    }
+    case 5: {
+      DecodeType5(instr);
+      break;
+    }
+    case 6: {
+      DecodeType6(instr);
+      break;
+    }
+    case 7: {
+      return DecodeType7(instr);
+    }
+    default: {
+      // The type field is 3-bits in the ARM encoding.
+      MOZ_CRASH();
+      break;
+    }
+  }
+  return Instruction::kInstrSize;
+}
+
+}  // namespace disasm
+
+#  undef STRING_STARTS_WITH
+#  undef VERIFY
+
+//------------------------------------------------------------------------------
+
+namespace disasm {
+
+const char* NameConverter::NameOfAddress(uint8_t* addr) const {
+  SNPrintF(tmp_buffer_, "%p", addr);
+  return tmp_buffer_.start();
+}
+
+const char* NameConverter::NameOfConstant(uint8_t* addr) const {
+  return NameOfAddress(addr);
+}
+
+const char* NameConverter::NameOfCPURegister(int reg) const {
+  return disasm::Registers::Name(reg);
+}
+
+const char* NameConverter::NameOfByteCPURegister(int reg) const {
+  MOZ_CRASH();  // ARM does not have the concept of a byte register
+  return "nobytereg";
+}
+
+const char* NameConverter::NameOfXMMRegister(int reg) const {
+  MOZ_CRASH();  // ARM does not have any XMM registers
+  return "noxmmreg";
+}
+
+const char* NameConverter::NameInCode(uint8_t* addr) const {
+  // The default name converter is called for unknown code. So we will not try
+  // to access any memory.
+  return "";
+}
+
+//------------------------------------------------------------------------------
+
+Disassembler::Disassembler(const NameConverter& converter)
+    : converter_(converter) {}
+
+Disassembler::~Disassembler() {}
+
+int Disassembler::InstructionDecode(V8Vector<char> buffer,
+                                    uint8_t* instruction) {
+  Decoder d(converter_, buffer);
+  return d.InstructionDecode(instruction);
+}
+
+int Disassembler::ConstantPoolSizeAt(uint8_t* instruction) {
+  return Decoder::ConstantPoolSizeAt(instruction);
+}
+
+void Disassembler::Disassemble(FILE* f, uint8_t* begin, uint8_t* end) {
+  NameConverter converter;
+  Disassembler d(converter);
+  for (uint8_t* pc = begin; pc < end;) {
+    EmbeddedVector<char, ReasonableBufferSize> buffer;
+    buffer[0] = '\0';
+    uint8_t* prev_pc = pc;
+    pc += d.InstructionDecode(buffer, pc);
+    fprintf(f, "%p    %08x      %s\n", prev_pc,
+            *reinterpret_cast<int32_t*>(prev_pc), buffer.start());
+  }
+}
+
+}  // namespace disasm
+}  // namespace jit
+}  // namespace js
+
+#endif  // JS_DISASM_ARM
diff --git a/js/src/jit/arm/disasm/Disasm-arm.h b/js/src/jit/arm/disasm/Disasm-arm.h
new file mode 100644
index 0000000000..8a0dd97c32
--- /dev/null
+++ b/js/src/jit/arm/disasm/Disasm-arm.h
@@ -0,0 +1,141 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+// Copyright 2007-2008 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef jit_arm_disasm_Disasm_arm_h
+#define jit_arm_disasm_Disasm_arm_h
+
+#ifdef JS_DISASM_ARM
+
+#  include "mozilla/Assertions.h"
+#  include "mozilla/Types.h"
+
+#  include <stdio.h>
+
+namespace js {
+namespace jit {
+namespace disasm {
+
+typedef unsigned char byte;
+
+// A reasonable (ie, safe) buffer size for the disassembly of a single
+// instruction.
+const int ReasonableBufferSize = 256;
+
+// Vector as used by the original code to allow for minimal modification.
+// Functions exactly like a character array with helper methods.
+template <typename T>
+class V8Vector {
+ public:
+  V8Vector() : start_(nullptr), length_(0) {}
+  V8Vector(T* data, int length) : start_(data), length_(length) {
+    MOZ_ASSERT(length == 0 || (length > 0 && data != nullptr));
+  }
+
+  // Returns the length of the vector.
+  int length() const { return length_; }
+
+  // Returns the pointer to the start of the data in the vector.
+  T* start() const { return start_; }
+
+  // Access individual vector elements - checks bounds in debug mode.
+  T& operator[](int index) const {
+    MOZ_ASSERT(0 <= index && index < length_);
+    return start_[index];
+  }
+
+  V8Vector<T> operator+(int offset) const {
+    MOZ_ASSERT(offset < length_);
+    return V8Vector<T>(start_ + offset, length_ - offset);
+  }
+
+ private:
+  T* start_;
+  int length_;
+};
+
+template <typename T, int kSize>
+class EmbeddedVector : public V8Vector<T> {
+ public:
+  EmbeddedVector() : V8Vector<T>(buffer_, kSize) {}
+
+  explicit EmbeddedVector(T initial_value) : V8Vector<T>(buffer_, kSize) {
+    for (int i = 0; i < kSize; ++i) {
+      buffer_[i] = initial_value;
+    }
+  }
+
+  // When copying, make underlying Vector to reference our buffer.
+  EmbeddedVector(const EmbeddedVector& rhs) : V8Vector<T>(rhs) {
+    MemCopy(buffer_, rhs.buffer_, sizeof(T) * kSize);
+    this->set_start(buffer_);
+  }
+
+  EmbeddedVector& operator=(const EmbeddedVector& rhs) {
+    if (this == &rhs) return *this;
+    V8Vector<T>::operator=(rhs);
+    MemCopy(buffer_, rhs.buffer_, sizeof(T) * kSize);
+    this->set_start(buffer_);
+    return *this;
+  }
+
+ private:
+  T buffer_[kSize];
+};
+
+// Interface and default implementation for converting addresses and
+// register-numbers to text.  The default implementation is machine
+// specific.
+class NameConverter {
+ public:
+  virtual ~NameConverter() {}
+  virtual const char* NameOfCPURegister(int reg) const;
+  virtual const char* NameOfByteCPURegister(int reg) const;
+  virtual const char* NameOfXMMRegister(int reg) const;
+  virtual const char* NameOfAddress(byte* addr) const;
+  virtual const char* NameOfConstant(byte* addr) const;
+  virtual const char* NameInCode(byte* addr) const;
+
+ protected:
+  EmbeddedVector<char, 128> tmp_buffer_;
+};
+
+// A generic Disassembler interface
+class Disassembler {
+ public:
+  // Caller deallocates converter.
+  explicit Disassembler(const NameConverter& converter);
+
+  virtual ~Disassembler();
+
+  // Writes one disassembled instruction into 'buffer' (0-terminated).
+  // Returns the length of the disassembled machine instruction in bytes.
+  int InstructionDecode(V8Vector<char> buffer, uint8_t* instruction);
+
+  // Returns -1 if instruction does not mark the beginning of a constant pool,
+  // or the number of entries in the constant pool beginning here.
+  int ConstantPoolSizeAt(byte* instruction);
+
+  // Write disassembly into specified file 'f' using specified NameConverter
+  // (see constructor).
+  static void Disassemble(FILE* f, uint8_t* begin, uint8_t* end);
+
+ private:
+  const NameConverter& converter_;
+
+  // Disallow implicit constructors.
+  Disassembler() = delete;
+  Disassembler(const Disassembler&) = delete;
+  void operator=(const Disassembler&) = delete;
+};
+
+}  // namespace disasm
+}  // namespace jit
+}  // namespace js
+
+#endif  // JS_DISASM_ARM
+
+#endif  // jit_arm_disasm_Disasm_arm_h
diff --git a/js/src/jit/arm/gen-double-encoder-table.py b/js/src/jit/arm/gen-double-encoder-table.py
new file mode 100644
index 0000000000..fd622da82e
--- /dev/null
+++ b/js/src/jit/arm/gen-double-encoder-table.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""Generate tables of immediately-encodable VFP doubles.
+
+DOES NOT get automatically run during the build process.  If you need to
+modify this file (which is unlikely), you must re-run this script:
+
+python gen-double-encode-table.py > $(topsrcdir)/path/to/DoubleEntryTable.tbl
+"""
+
+import operator
+
+
+def rep(bit, count):
+    return reduce(operator.ior, [bit << c for c in range(count)])
+
+
+def encodeDouble(value):
+    """Generate an ARM ARM 'VFP modified immediate constant' with format:
+    aBbbbbbb bbcdefgh 000...
+
+    We will return the top 32 bits of the double; the rest are 0."""
+    assert (0 <= value) and (value <= 255)
+    a = value >> 7
+    b = (value >> 6) & 1
+    B = int(b == 0)
+    cdefgh = value & 0x3F
+    return (a << 31) | (B << 30) | (rep(b, 8) << 22) | cdefgh << 16
+
+
+print("/* THIS FILE IS AUTOMATICALLY GENERATED BY gen-double-encode-table.py.  */")
+for i in range(256):
+    print("  { 0x%08x, { %d, %d, 0 } }," % (encodeDouble(i), i & 0xF, i >> 4))
diff --git a/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S
new file mode 100644
index 0000000000..0237f2221d
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S
@@ -0,0 +1,27 @@
+//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+//   int rem, quot;
+//   quot = __divmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__divmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
diff --git a/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S
new file mode 100644
index 0000000000..f7e1d2ebed
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S
@@ -0,0 +1,28 @@
+//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+//   unsigned rem, quot;
+//   quot = __udivmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__udivmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
diff --git a/js/src/jit/arm/llvm-compiler-rt/assembly.h b/js/src/jit/arm/llvm-compiler-rt/assembly.h
new file mode 100644
index 0000000000..802d1e2870
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/assembly.h
@@ -0,0 +1,67 @@
+/* ===-- assembly.h - compiler-rt assembler support macros -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file defines macros for use in compiler-rt assembler source.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef COMPILERRT_ASSEMBLY_H
+#define COMPILERRT_ASSEMBLY_H
+
+#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__)
+#  define SEPARATOR @
+#else
+#  define SEPARATOR ;
+#endif
+
+#if defined(__APPLE__)
+#  define HIDDEN_DIRECTIVE .private_extern
+#  define LOCAL_LABEL(name) L_##name
+#else
+#  define HIDDEN_DIRECTIVE .hidden
+#  define LOCAL_LABEL(name) .L_##name
+#endif
+
+#define GLUE2(a, b) a##b
+#define GLUE(a, b) GLUE2(a, b)
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#ifdef VISIBILITY_HIDDEN
+#  define DECLARE_SYMBOL_VISIBILITY(name) \
+    HIDDEN_DIRECTIVE SYMBOL_NAME(name) SEPARATOR
+#else
+#  define DECLARE_SYMBOL_VISIBILITY(name)
+#endif
+
+#define DEFINE_COMPILERRT_FUNCTION(name) \
+  .globl SYMBOL_NAME(name)               \
+  SEPARATOR DECLARE_SYMBOL_VISIBILITY(name) SYMBOL_NAME(name) :
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \
+  .globl SYMBOL_NAME(name)                       \
+  SEPARATOR HIDDEN_DIRECTIVE SYMBOL_NAME(name)   \
+  SEPARATOR SYMBOL_NAME(name) :
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \
+  .globl name SEPARATOR HIDDEN_DIRECTIVE name SEPARATOR name:
+
+#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)      \
+  .globl SYMBOL_NAME(name) SEPARATOR.set SYMBOL_NAME(name), \
+      SYMBOL_NAME(target) SEPARATOR
+
+#if defined(__ARM_EABI__)
+#  define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \
+    DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name)
+#else
+#  define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)
+#endif
+
+#endif /* COMPILERRT_ASSEMBLY_H */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 01:47:29 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 01:47:29 +0000
commit	0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
tree	a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /js/src/jit/arm
parent	Initial commit. (diff)
download	firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip